Skip to content

Commit

Permalink
The contours of images can now be written in an XML file
Browse files Browse the repository at this point in the history
  • Loading branch information
vahidrezanezhad committed Nov 28, 2023
1 parent 364ccac commit aa41e4d
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 11 deletions.
4 changes: 2 additions & 2 deletions qurator/eynollah/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ def main(
if textline_light and not light_version:
print('Error: You used -tll to enable light textline detection but -light is not enabled')
sys.exit(1)
if extract_only_images and not ( save_images and enable_plotting):
print('Error: You used -eoi to enable extract images only mode but did not enable plotting with -ep and providing an output directory with -si')
if extract_only_images and (allow_enhancement or allow_scaling or light_version) :
print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae')
sys.exit(1)
eynollah = Eynollah(
image_filename=image,
Expand Down
48 changes: 39 additions & 9 deletions qurator/eynollah/eynollah.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,8 +624,11 @@ def resize_and_enhance_image_with_column_classifier(self,light_version):
image_res = np.copy(img)
is_image_enhanced = False
else:
img_new, num_column_is_classified = self.calculate_width_height_by_columns_extract_only_images(img, num_col, width_early, label_p_pred)
image_res = np.copy(img_new)
#img_new, num_column_is_classified = self.calculate_width_height_by_columns_extract_only_images(img, num_col, width_early, label_p_pred)
#image_res = np.copy(img_new)
#is_image_enhanced = True
num_column_is_classified = True
image_res = np.copy(img)
is_image_enhanced = False

self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
Expand Down Expand Up @@ -1621,16 +1624,27 @@ def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_t
box_sub.put(boxes_sub_new)

def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
self.logger.debug("enter get_regions_light_v")
self.logger.debug("enter get_regions_extract_images_only")
erosion_hurts = False
img_org = np.copy(img)
img_height_h = img_org.shape[0]
img_width_h = img_org.shape[1]

#model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)

if num_col_classifier == 1:
img_w_new = 700
elif num_col_classifier == 2:
img_w_new = 900
elif num_col_classifier == 3:
img_w_new = 1500
elif num_col_classifier == 4:
img_w_new = 1800
elif num_col_classifier == 5:
img_w_new = 2200
elif num_col_classifier == 6:
img_w_new = 2500
img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)

img_resized = np.copy(img)
img_resized = resize_image(img,img_h_new, img_w_new )



Expand All @@ -1644,6 +1658,11 @@ def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_
#plt.show()

prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )

image_page, page_coord, cont_page = self.extract_page()


prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]


prediction_regions_org=prediction_regions_org[:,:,0]
Expand Down Expand Up @@ -1695,14 +1714,21 @@ def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_

if test_poly_image_intersected_area==0:
polygons_of_images_fin.append(ploy_img_ind)

#x, y, w, h = cv2.boundingRect(ploy_img_ind)
#box = [x, y, w, h]
#_, page_coord = crop_image_inside_box(box, text_regions_p_true)
#cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))

#polygons_of_images_fin.append(np.array(cont_page))
#plt.imshow(test_poly_image)
#plt.show()





return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin
return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
self.logger.debug("enter get_regions_light_v")
erosion_hurts = False
Expand Down Expand Up @@ -2554,6 +2580,7 @@ def get_tables_from_model(self, img, num_col_classifier):
prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20)
prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20)
return prediction_table_erode.astype(np.int16)

def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts):
img_g = self.imread(grayscale=True, uint8=True)

Expand Down Expand Up @@ -2970,13 +2997,16 @@ def run(self):
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
self.logger.info("Enhancing took %.1fs ", time.time() - t0)

text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)

pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [])

if self.plotter:
self.plotter.write_images_into_directory(polygons_of_images, img_res)
#plt.imshow(text_regions_p_1)
#plt.show()

self.writer.write_pagexml(pcgts)

else:
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
Expand Down

0 comments on commit aa41e4d

Please sign in to comment.