From 7cbca79f1676da3bead00acaba44157dab5de05c Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 1 Dec 2023 23:40:47 +0100
Subject: [PATCH] replacing images cotour with bounding box

---
 qurator/eynollah/eynollah.py | 32 +++++++++++++++++---------------
 qurator/eynollah/writer.py   | 16 ++++++++++++----
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py
index 5a8adeb..e3e3a20 100644
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
@@ -1693,17 +1693,18 @@ def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_
         
         image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
         
-        image_boundary_of_doc[:20, :] = 1
-        image_boundary_of_doc[text_regions_p_true.shape[0]-20:text_regions_p_true.shape[0], :] = 1
+        ###image_boundary_of_doc[:6, :] = 1
+        ###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
         
-        image_boundary_of_doc[:, :20] = 1
-        image_boundary_of_doc[:, text_regions_p_true.shape[1]-20:text_regions_p_true.shape[1]] = 1
+        ###image_boundary_of_doc[:, :6] = 1
+        ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
         
         #plt.imshow(image_boundary_of_doc)
         #plt.show()
         
         polygons_of_images_fin = []
         for ploy_img_ind in polygons_of_images:
+            """
             test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
             test_poly_image = cv2.fillPoly(test_poly_image, pts = [ploy_img_ind], color=(1,1,1))
             
@@ -1713,20 +1714,21 @@ def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_
             test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
             
             if test_poly_image_intersected_area==0:
-                polygons_of_images_fin.append(ploy_img_ind)
+                ##polygons_of_images_fin.append(ploy_img_ind)
                 
-                #x, y, w, h = cv2.boundingRect(ploy_img_ind)
-                #box = [x, y, w, h]
-                #_, page_coord = crop_image_inside_box(box, text_regions_p_true)
+                x, y, w, h = cv2.boundingRect(ploy_img_ind)
+                box = [x, y, w, h]
+                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
                 #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
                 
-                #polygons_of_images_fin.append(np.array(cont_page))
-            #plt.imshow(test_poly_image)
-            #plt.show()
-            
-            
-            
-        
+                polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
+            """
+            x, y, w, h = cv2.boundingRect(ploy_img_ind)
+            box = [x, y, w, h]
+            _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
+            #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
+            
+            polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
         
         return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
     def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py
index f537f65..4487af5 100644
--- a/qurator/eynollah/writer.py
+++ b/qurator/eynollah/writer.py
@@ -172,10 +172,18 @@ def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, o
             page.add_ImageRegion(img_region)
             points_co = ''
             for lmm in range(len(found_polygons_text_region_img[mm])):
-                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
-                points_co += ','
-                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
-                points_co += ' '
+                try:
+                    points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
+                    points_co += ','
+                    points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
+                    points_co += ' '
+                except:
+
+                    points_co +=  str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x  ))
+                    points_co += ','
+                    points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y  ))
+                    points_co += ' '
+                    
             img_region.get_Coords().set_points(points_co[:-1])
             
         for mm in range(len(polygons_lines_to_be_written_in_xml)):