From 5fdc6d4fa48d25309d1a774b04e79debbf797e75 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Oct 2023 09:05:05 +0200 Subject: [PATCH 01/64] integration of machine based reading order detection --- qurator/eynollah/eynollah.py | 222 +++++++++++++++++++++++++++--- qurator/eynollah/utils/contour.py | 11 +- 2 files changed, 209 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 4b1b5e9..b83db98 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -78,6 +78,7 @@ from .plot import EynollahPlotter from .writer import EynollahXmlWriter +MIN_AREA_REGION = 0.0005 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -225,6 +226,7 @@ def __init__( self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" + self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -246,6 +248,7 @@ def __init__( self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -264,6 +267,7 @@ def __init__( self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -1647,9 +1651,39 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -1785,7 +1819,7 @@ def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -1853,7 +1887,7 @@ def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) @@ -2821,13 +2855,157 @@ def our_load_model(self, model_file): model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model + + def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + + #print(text_regions_p.shape) + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + #print(unique_pix, 'unique_pix') + + #for pix in unique_pix: + #print(pix) + #plt.imshow((text_regions_p[:,:]==pix)*1 ) + #plt.show() + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + #plt.imshow(text_regions_p) + #plt.show() + + + #plt.imshow(img_poly) + #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + #plt.imshow(img_header_and_sep[:,:]) + #plt.show() + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + #id_all_text = id_paragraph + id_header + + #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] + #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] + + #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + + #plt.imshow(img3) + #plt.show() + + order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + + for i in range(labels_con.shape[2]): + for j in range(labels_con.shape[2]): + if j>i: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + #img1 = img1.astype(np.uint16) + #img2 = img2.astype(np.uint16) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + #plt.imshow(labels_con[:,:,i]) + #plt.show() + + #plt.imshow(img2[:,:,0]) + #plt.show() + + + #plt.imshow(img1[:,:,0]) + #plt.show() + + #sys.exit() + input_1= np.zeros( (height1, width1,3)) + + input_1[:,:,0] = img1[:,:,0]/3. + input_1[:,:,2] = img2[:,:,0]/3. + input_1[:,:,1] = img3[:,:]/5. + + #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) + y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) + #print(y_pr) + + if y_pr>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i,j] = y_pr#order_class + order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + + + sum_mat = np.sum(order_matrix, axis=1) + index_sort = np.argsort(sum_mat) + index_sort = index_sort[::-1] + + print(index_sort) + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + + return order_of_texts, id_of_texts def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - + + self.reading_order_machine_based = True#True t0_tot = time.time() @@ -2896,7 +3074,7 @@ def run(self): text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - min_con_area = 0.000005 + ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) @@ -2906,8 +3084,8 @@ def run(self): areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -2983,8 +3161,8 @@ def run(self): areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) @@ -3086,21 +3264,33 @@ def run(self): self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + + print(id_of_texts_tot,'id_of_texts_tot') + print(order_text_new,'order_text_new') + else: contours_only_text_parent_h = None - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index bac8235..53b39b5 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -44,8 +44,8 @@ def get_text_region_boxes_by_given_contours(contours): def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -53,14 +53,12 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area area = polygon.area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 : found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) - jv += 1 return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -73,7 +71,6 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : # print(c[0][0][1]) found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) - jv += 1 return found_polygons_early def find_new_features_of_contours(contours_main): @@ -234,8 +231,6 @@ def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): with Pool(cpu_count()) as p: cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) - print(len(cnts_org),'lendiha') - return cnts_org def get_textregion_contours_in_org_image(cnts, img, slope_first): From 49c93149a49b103b6434fee79ef28517fa4b13f9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:01:28 +0200 Subject: [PATCH 02/64] machine based reading order inference with a variable batch size --- qurator/eynollah/eynollah.py | 96 +++++++++++++++--------------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b83db98..35992c9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2857,32 +2857,20 @@ def our_load_model(self, model_file): return model def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): - - #print(text_regions_p.shape) y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] img_poly = np.zeros((y_len,x_len), dtype='uint8') unique_pix = np.unique(text_regions_p) - #print(unique_pix, 'unique_pix') - - #for pix in unique_pix: - #print(pix) - #plt.imshow((text_regions_p[:,:]==pix)*1 ) - #plt.show() + img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - #plt.imshow(text_regions_p) - #plt.show() - - - #plt.imshow(img_poly) - #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 @@ -2900,19 +2888,11 @@ def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_on img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') for j in range(len(cy_main)): - #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - #plt.imshow(img_header_and_sep[:,:]) - #plt.show() co_text_all = contours_only_text_parent + contours_only_text_parent_h - #id_all_text = id_paragraph + id_header - - #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] - #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] - #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') for i in range(len(co_text_all)): @@ -2932,63 +2912,69 @@ def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_on img3 = img3.astype(np.uint16) - #plt.imshow(img3) - #plt.show() - order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + inference_bs = 6 + tot_counter = 1 + batch_counter = 0 + i_indexer = [] + j_indexer =[] + + input_1= np.zeros( (inference_bs, height1, width1,3)) + tot_iteration = int( ( labels_con.shape[2]*(labels_con.shape[2]-1) )/2. ) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + #print(labels_con.shape[2],"number of regions for reading order") for i in range(labels_con.shape[2]): for j in range(labels_con.shape[2]): if j>i: img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - #img1 = img1.astype(np.uint16) - #img2 = img2.astype(np.uint16) img2[:,:,0][img3[:,:]==5] = 2 img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - #plt.imshow(labels_con[:,:,i]) - #plt.show() - - #plt.imshow(img2[:,:,0]) - #plt.show() - + i_indexer.append(i) + j_indexer.append(j) - #plt.imshow(img1[:,:,0]) - #plt.show() + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. - #sys.exit() - input_1= np.zeros( (height1, width1,3)) + batch_counter = batch_counter+1 - input_1[:,:,0] = img1[:,:,0]/3. - input_1[:,:,2] = img2[:,:,0]/3. - input_1[:,:,1] = img3[:,:]/5. - - #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) - y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) - #print(y_pr) + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) - if y_pr>=0.5: - order_class = 1 - else: - order_class = 0 + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i_indexer[jb],j_indexer[jb]] = y_pr[jb][0]#order_class + order_matrix[j_indexer[jb],i_indexer[jb]] = 1-y_pr[jb][0]#int( 1 - order_class) + + batch_counter = 0 - order_matrix[i,j] = y_pr#order_class - order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + i_indexer = [] + j_indexer = [] + tot_counter = tot_counter+1 sum_mat = np.sum(order_matrix, axis=1) index_sort = np.argsort(sum_mat) index_sort = index_sort[::-1] - print(index_sort) REGION_ID_TEMPLATE = 'region_%04d' order_of_texts = [] id_of_texts = [] @@ -3272,13 +3258,12 @@ def run(self): order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts - print(id_of_texts_tot,'id_of_texts_tot') - print(order_text_new,'order_text_new') else: contours_only_text_parent_h = None @@ -3291,6 +3276,7 @@ def run(self): contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts From 59c0d90e5af7ed3f1d3d8d7a78ecdcc17eb2fb59 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:17:46 +0200 Subject: [PATCH 03/64] machine based reading order inference & optimized algorithm --- qurator/eynollah/eynollah.py | 153 ++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 35992c9..63e71cb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2855,7 +2855,6 @@ def our_load_model(self, model_file): model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -2983,6 +2982,154 @@ def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_on id_of_texts.append( REGION_ID_TEMPLATE % order ) + return order_of_texts, id_of_texts + + def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + list_inp.pop(index_element_to_be_updated) + if len(pr_list)>0: + list_inp.insert(index_element_to_be_updated, pr_list) + else: + index_element_to_be_updated = index_element_to_be_updated -1 + + list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) + if len(pos_list)>0: + list_inp.insert(index_element_to_be_updated+2, pos_list) + + len_all_elements = [len(i) for i in list_inp] + list_len_bigger_1 = np.where(np.array(len_all_elements)>1) + list_len_bigger_1 = list_len_bigger_1[0] + + if len(list_len_bigger_1)>0: + early_list_bigger_than_one = list_len_bigger_1[0] + else: + early_list_bigger_than_one = -20 + return list_inp, early_list_bigger_than_one + def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + inference_bs = 4 + input_1= np.zeros( (inference_bs, height1, width1,3)) + starting_list_of_regions = [] + starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + index_update = 0 + index_selected = starting_list_of_regions[0] + #print(labels_con.shape[2],"number of regions for reading order") + while index_update>=0: + ij_list = starting_list_of_regions[index_update] + i = ij_list[0] + ij_list.pop(0) + + pr_list = [] + post_list = [] + + batch_counter = 0 + tot_counter = 1 + + tot_iteration = len(ij_list) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + jbatch_indexer =[] + for j in ij_list: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + jbatch_indexer.append(j) + + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. + + batch_counter = batch_counter+1 + + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) + + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + post_list.append(jbatch_indexer[jb]) + else: + pr_list.append(jbatch_indexer[jb]) + + batch_counter = 0 + jbatch_indexer = [] + + tot_counter = tot_counter+1 + + starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + + index_sort = [i[0] for i in starting_list_of_regions ] + + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + return order_of_texts, id_of_texts def run(self): @@ -3252,7 +3399,7 @@ def run(self): if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -3268,7 +3415,7 @@ def run(self): else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From 941d87328a45ad6df5df27c0a84a4b695de65c67 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 11:19:30 +0200 Subject: [PATCH 04/64] machine based reading order & works for not full layout case --- qurator/eynollah/eynollah.py | 84 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 63e71cb..c008476 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2881,16 +2881,17 @@ def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_on height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -2984,7 +2985,7 @@ def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_on return order_of_texts, id_of_texts - def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + def update_list_and_return_first_with_length_bigger_than_one(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): list_inp.pop(index_element_to_be_updated) if len(pr_list)>0: list_inp.insert(index_element_to_be_updated, pr_list) @@ -3030,16 +3031,17 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -3118,7 +3120,7 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text tot_counter = tot_counter+1 - starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + starting_list_of_regions, index_update = self.update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions) index_sort = [i[0] for i in starting_list_of_regions ] @@ -3138,7 +3140,7 @@ def run(self): """ self.logger.debug("enter run") - self.reading_order_machine_based = True#True + self.reading_order_machine_based = True#False#True#True t0_tot = time.time() @@ -3359,32 +3361,32 @@ def run(self): all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) pixel_lines = 6 + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - if num_col_classifier >= 3: + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) #print(boxes_d,'boxes_d') #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) From eac18c553d6829cbb6c3c0d6ca1572977a2b3243 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 13 Dec 2023 01:44:51 +0100 Subject: [PATCH 05/64] machine based reading order as an argument --- qurator/eynollah/cli.py | 8 ++++++++ qurator/eynollah/eynollah.py | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..a422df9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -133,6 +133,12 @@ is_flag=True, help="if this parameter set to true, this tool would ignore page extraction", ) +@click.option( + "--reading_order_machine_based/--heuristic_reading_order", + "-romb/-hro", + is_flag=True, + help="if this parameter set to true, this tool would apply machine based reading order detection", +) @click.option( "--log-level", "-l", @@ -160,6 +166,7 @@ def main( allow_scaling, headers_off, light_version, + reading_order_machine_based, ignore_page_extraction, log_level ): @@ -197,6 +204,7 @@ def main( headers_off=headers_off, light_version=light_version, ignore_page_extraction=ignore_page_extraction, + reading_order_machine_based=reading_order_machine_based, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c008476..5e06734 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -165,6 +165,7 @@ def __init__( headers_off=False, light_version=False, ignore_page_extraction=False, + reading_order_machine_based=False, override_dpi=None, logger=None, pcgts=None, @@ -181,6 +182,7 @@ def __init__( self.dir_in = dir_in self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page + self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images @@ -226,7 +228,7 @@ def __init__( self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" + self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -3139,8 +3141,6 @@ def run(self): Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - self.reading_order_machine_based = True#False#True#True t0_tot = time.time() From 514466883415f86ea90b6ef48a4e15187407ec05 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 17 Jul 2024 10:01:37 +0200 Subject: [PATCH 06/64] ocr engine first integration --- qurator/eynollah/cli.py | 8 + qurator/eynollah/eynollah.py | 295 ++++++++++++++++++++++++++++++++++- qurator/eynollah/writer.py | 15 +- 3 files changed, 313 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a422df9..833e904 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -139,6 +139,12 @@ is_flag=True, help="if this parameter set to true, this tool would apply machine based reading order detection", ) +@click.option( + "--do_ocr", + "-ocr/-noocr", + is_flag=True, + help="if this parameter set to true, this tool will try to do ocr", +) @click.option( "--log-level", "-l", @@ -167,6 +173,7 @@ def main( headers_off, light_version, reading_order_machine_based, + do_ocr, ignore_page_extraction, log_level ): @@ -205,6 +212,7 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, + do_ocr=do_ocr, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 5e06734..a505b0e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -17,6 +17,16 @@ from ocrd_utils import getLogger import cv2 import numpy as np +from transformers import TrOCRProcessor +from PIL import Image +import torch +from difflib import SequenceMatcher as sq +from transformers import VisionEncoderDecoderModel +from numba import cuda +import copy +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") @@ -166,6 +176,7 @@ def __init__( light_version=False, ignore_page_extraction=False, reading_order_machine_based=False, + do_ocr=False, override_dpi=None, logger=None, pcgts=None, @@ -199,6 +210,7 @@ def __init__( self.headers_off = headers_off self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction + self.ocr = do_ocr self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -233,6 +245,9 @@ def __init__( self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + if self.ocr: + self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" + self.model_tables = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -251,6 +266,10 @@ def __init__( self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + if self.ocr: + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") self.ls_imgs = os.listdir(self.dir_in) @@ -3135,6 +3154,223 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text return order_of_texts, id_of_texts + def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.2*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) + #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final[0], peaks_final[1] + else: + pass + + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.06*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + #print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_max = np.argmax(sum_smoothed[peaks_real]) + + peaks_final = peaks_real[arg_max] + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final, peaks_final], [0, height-1]) + ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final + else: + return None + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(self,peaks_real, sum_smoothed, start_split, end_split): + peaks_real = peaks_real[(peaks_realstart_split)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + return peaks_final[0] + + def return_start_and_end_of_common_text_of_textline_ocr_new(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.15*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + mid = int(width/2.) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, width1, mid+2) + + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, mid-2, width2) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peak_start, peak_start], [0, height-1]) + #plt.plot([peak_end, peak_end], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peak_start, peak_end + else: + pass + + def return_ocr_of_textline_without_common_section(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image, ind_tot) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values + #pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) + generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + #print(generated_text_merged,'generated_text_merged') + + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + + #generated_text = generated_text1 + ' ' + generated_text2 + generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] + + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + else: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + #print(generated_text,'generated_text') + #print('########################################') + return generated_text + def return_ocr_of_textline(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + try: + width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) + + image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) + + pixel_values1 = processor(image1, return_tensors="pt").pixel_values + pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + + match = sq(None, generated_text1, generated_text2).find_longest_match(0, len(generated_text1), 0, len(generated_text2)) + + generated_text = generated_text1 + generated_text2[match.b+match.size:] + except: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): + textline_contour[:,0] = textline_contour[:,0] + box_ind[2] + textline_contour[:,1] = textline_contour[:,1] + box_ind[0] + return textline_contour def run(self): """ @@ -3398,6 +3634,7 @@ def run(self): if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() + if self.full_layout: if self.reading_order_machine_based: @@ -3425,11 +3662,67 @@ def run(self): contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + img_poly_on_img = np.copy(image_page) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) + + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index f537f65..c69be9b 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -2,7 +2,7 @@ # pylint: disable=import-error from pathlib import Path import os.path - +import xml.etree.ElementTree as ET from .utils.xml import create_page_xml, xml_reading_order from .utils.counter import EynollahIdCounter @@ -12,6 +12,7 @@ CoordsType, PcGtsType, TextLineType, + TextEquivType, TextRegionType, ImageRegionType, TableRegionType, @@ -93,11 +94,13 @@ def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygo points_co += ' ' coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_textline_polygons[region_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) region_bboxes = all_box_coord[region_idx] points_co = '' @@ -140,7 +143,7 @@ def write_pagexml(self, pcgts): with open(out_fname, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -159,7 +162,11 @@ def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, o Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), ) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From a62ae370c3ff37495383f8415620dc2cf5d44eb1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 02:21:01 +0200 Subject: [PATCH 07/64] new full layout model and early layout for 1&2 column images are integrated - light version --- qurator/eynollah/eynollah.py | 118 ++++++++++++++++++++++++++++++----- qurator/eynollah/writer.py | 16 ++++- 2 files changed, 114 insertions(+), 20 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a505b0e..8032f1e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -241,6 +241,8 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -263,6 +265,8 @@ def __init__( self.model_bin = self.our_load_model(self.model_dir_of_binarization) self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -1069,6 +1073,66 @@ def early_page_for_num_of_column_classification(self,img_bin): croped_page, page_coord = crop_image_inside_box(box, img) return croped_page, page_coord + def extract_text_regions_new(self, img, patches, cols): + self.logger.debug("enter extract_text_regions") + img_height_h = img.shape[0] + img_width_h = img.shape[1] + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + else: + model_region = self.model_region_fl_new if patches else self.model_region_fl_np + + if not patches: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + prediction_regions2 = None + else: + if cols == 1: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + + img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) + img = img.astype(np.uint8) + + if cols == 2: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) + img = img.astype(np.uint8) + + if cols == 3: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) + img = img.astype(np.uint8) + + if cols == 4: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) + img = img.astype(np.uint8) + + if cols == 5: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) + img = img.astype(np.uint8) + + if cols >= 6: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) + img = img.astype(np.uint8) + + marginal_of_patch_percent = 0.1 + + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) + self.logger.debug("exit extract_text_regions") + return prediction_regions, prediction_regions + + def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1652,10 +1716,17 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): textline_mask_tot_ea = self.run_textline(img_bin) if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2828,24 +2899,32 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - - regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) - text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + + # 6 is the separators lable in old full layout model + # 4 is the drop capital class in old full layout model + # in the new full layout drop capital is 3 and separators are 5 + + text_regions_p[:,:][regions_fully[:,:,0]==5]=6 + regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + + #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 + #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) - regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) - if num_col_classifier > 2: - regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 - else: - regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) + ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) + ##if num_col_classifier > 2: + ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 + ##else: + ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 - text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 + ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() ####if not self.tables: @@ -3645,8 +3724,13 @@ def run(self): else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) + + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index c69be9b..29caddc 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -208,7 +208,7 @@ def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, o return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -225,14 +225,24 @@ def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_t textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): textregion = TextRegionType(id=counter.next_region_id, type_='header', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From be144db9f83fbdd0bd345b89f5634b419e0fd919 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 18:13:10 +0200 Subject: [PATCH 08/64] updating 1&2 columns images + full layout --- qurator/eynollah/eynollah.py | 143 +++++++++++++++++++++-------- qurator/eynollah/utils/__init__.py | 14 ++- 2 files changed, 115 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8032f1e..54e6e3b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1083,43 +1083,64 @@ def extract_text_regions_new(self, img, patches, cols): model_region = self.model_region_fl_new if patches else self.model_region_fl_np if not patches: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) img = img.astype(np.uint8) if cols == 2: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) img = img.astype(np.uint8) if cols == 3: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) img = img.astype(np.uint8) if cols == 4: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) img = img.astype(np.uint8) if cols == 5: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) img = img.astype(np.uint8) if cols >= 6: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) img = img.astype(np.uint8) @@ -1611,6 +1632,7 @@ def textline_contours(self, img, patches, scaler_h, scaler_w): img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) + #print(img.shape,'bin shape') if not self.dir_in: prediction_textline = self.do_prediction(patches, img, model_textline) else: @@ -1664,6 +1686,7 @@ def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_t box_sub.put(boxes_sub_new) def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_light_v") + t_in = time.time() erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] @@ -1671,7 +1694,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - + #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 @@ -1711,9 +1734,12 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) - + #print("inside 1 ", time.time()-t_in) textline_mask_tot_ea = self.run_textline(img_bin) + + + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: @@ -1727,12 +1753,14 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - + + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -1787,8 +1815,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2553,7 +2581,11 @@ def get_tables_from_model(self, img, num_col_classifier): prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2563,7 +2595,7 @@ def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + #print("inside graphics 1 ", time.time() - t_in_gr) if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: @@ -2574,6 +2606,9 @@ def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2582,7 +2617,7 @@ def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + #print("inside graphics 2 ", time.time() - t_in_gr) if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: @@ -2600,8 +2635,10 @@ def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, except Exception as why: self.logger.error(why) num_col = None - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea + #print("inside graphics 3 ", time.time() - t_in_gr) + return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2629,13 +2666,11 @@ def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_col img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2682,6 +2717,7 @@ def run_textline(self, image_page): return textline_mask_tot_ea def run_deskew(self, textline_mask_tot_ea): + #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) @@ -2805,7 +2841,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts): + def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') if self.tables: @@ -2900,20 +2936,23 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s image_page = image_page.astype(np.uint8) - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + if self.light_version: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + else: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 text_regions_p[:,:][regions_fully[:,:,0]==5]=6 - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 - - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) + ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + drop_capital_label_in_full_layout_model = 3 + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -2923,7 +2962,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() - text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 + text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() @@ -3463,22 +3502,41 @@ def run(self): self.ls_imgs = [1] for img_name in self.ls_imgs: + print(img_name) t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) + + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -3498,7 +3556,7 @@ def run(self): continue else: return pcgts - + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -3513,17 +3571,20 @@ def run(self): textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) if self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -3625,13 +3686,16 @@ def run(self): # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: pass + + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - + #print("text region early 5 in %.1fs", time.time() - t0) if not self.curved_line: if self.light_version: if self.textline_light: @@ -3651,7 +3715,7 @@ def run(self): all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) @@ -3778,7 +3842,10 @@ def run(self): #print(x, y, w, h, h/float(w),'ratio') h2w_ratio = h/float(w) mask_poly = np.zeros(image_page.shape) - img_poly_on_img = np.copy(image_page) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) @@ -3805,8 +3872,10 @@ def run(self): pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index d2b2488..929669f 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -775,9 +775,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): - - drop_only = (layout_in_patch[:, :, 0] == 4) * 1 +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label): + drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -786,13 +785,18 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001] + areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - layout_in_patch[y : y + h, x : x + w, 0] = 4 + + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + + layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label + else: + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label return layout_in_patch From 00bf2b64d016df86810ec2eed5799799c7a13fbd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 19:07:54 +0200 Subject: [PATCH 09/64] 1&2 column images only printspace --- qurator/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 54e6e3b..3f078b0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3549,7 +3549,8 @@ def run(self): if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) From e97677879638816ee12d0e1840b41e3e021ea9b2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:33:01 +0200 Subject: [PATCH 10/64] testing pyproject.toml --- pyproject.toml | 30 ++++++++++++++++ qurator/eynollah/cli.py | 80 +++++++++++++++++++++++++---------------- requirements.txt | 8 ----- setup.py | 28 --------------- 4 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..102f443 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "eynollah" +version = "1.2.3" + + + + +dependencies = [ + "ocrd >= 2.23.3", + "tensorflow >= 2.12.0", + "scikit-learn >= 0.23.2", + "imutils >= 0.5.3", + "numpy < 1.24.0", + "matplotlib", + "torch == 2.0.1", + "transformers == 4.30.2", + "numba == 0.58.1", +] + +[project.scripts] +eynollah = "qurator.eynollah.cli:main" + + +[tool.setuptools.packages.find] +where = ["."] +include = ["qurator"] diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 833e904..6c6561f 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -3,14 +3,60 @@ from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +@click.group() +def main(): + pass -@click.command() +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_modal_image", + "-domi", + help="directory where ground truth images would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_classes", + "-docl", + help="directory where ground truth classes would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--input_height", + "-ih", + help="input height", +) +@click.option( + "--input_width", + "-iw", + help="input width", +) +@click.option( + "--min_area_size", + "-min", + help="min area size of regions considered for reading order training.", +) + +def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): + xml_files_ind = os.listdir(dir_xml) + + +@main.command() @click.option( "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), ) + @click.option( "--out", "-o", @@ -146,37 +192,13 @@ help="if this parameter set to true, this tool will try to do ocr", ) @click.option( - "--log-level", + "--log_level", "-l", type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) -def main( - image, - out, - dir_in, - model, - save_images, - save_layout, - save_deskewed, - save_all, - save_page, - enable_plotting, - allow_enhancement, - curved_line, - textline_light, - full_layout, - tables, - right2left, - input_binary, - allow_scaling, - headers_off, - light_version, - reading_order_machine_based, - do_ocr, - ignore_page_extraction, - log_level -): + +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -215,8 +237,6 @@ def main( do_ocr=do_ocr, ) eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 530dac2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -numpy <1.24.0 -scikit-learn >= 0.23.2 -tensorflow >=2.12.0 -imutils >= 0.5.3 -matplotlib -setuptools >= 50 diff --git a/setup.py b/setup.py deleted file mode 100644 index 9abf158..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -from setuptools import setup, find_packages -from json import load - -install_requires = open('requirements.txt').read().split('\n') -with open('ocrd-tool.json', 'r', encoding='utf-8') as f: - version = load(f)['version'] - -setup( - name='eynollah', - version=version, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/eynollah', - license='Apache License 2.0', - namespace_packages=['qurator'], - packages=find_packages(exclude=['tests']), - install_requires=install_requires, - package_data={ - '': ['*.json'] - }, - entry_points={ - 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', - ] - }, -) From 53fd5fb2a5da9a4c42bd1964a3ed1d2427f8637e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:42:37 +0200 Subject: [PATCH 11/64] resolving #106 for pyproject.toml test --- qurator/eynollah/cli.py | 6 +++++- qurator/eynollah/eynollah.py | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6c6561f..b0f55cd 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -236,7 +236,11 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, ) - eynollah.run() + if dir_in: + eynollah.run() + else: + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 3f078b0..b27d269 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3797,7 +3797,8 @@ def run(self): pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts else: @@ -3872,9 +3873,11 @@ def run(self): self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts #print("text region early 7 in %.1fs", time.time() - t0) - self.writer.write_pagexml(pcgts) + if self.dir_in: + self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs", time.time() - t0) From 4c50479cb87cf6abf29f1ce8f907eb6814eedec0 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 15:28:36 +0200 Subject: [PATCH 12/64] pyproject.toml may work for ocrd --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 102f443..c76f7e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "eynollah" -version = "1.2.3" +version = "0.1.0" @@ -23,8 +23,12 @@ dependencies = [ [project.scripts] eynollah = "qurator.eynollah.cli:main" +ocrd-eynollah-segment="qurator.eynollah.ocrd_cli:main" [tool.setuptools.packages.find] where = ["."] include = ["qurator"] + +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 74eac4daccd7e5bd9dc5644dc01ad54671671a10 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 13:50:36 +0200 Subject: [PATCH 13/64] dtype = object in the case of length 1 arise error --- qurator/eynollah/eynollah.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b27d269..b4e7276 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3599,7 +3599,10 @@ def run(self): contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3614,7 +3617,10 @@ def run(self): if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + if len(contours_only_text_parent_d)>1: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + else: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3677,7 +3683,10 @@ def run(self): areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3719,7 +3728,10 @@ def run(self): #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3809,7 +3821,10 @@ def run(self): if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From 6f4205ba49e66ad99b1c18a95533d71447625faf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 16:08:45 +0200 Subject: [PATCH 14/64] update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c76f7e7..67544bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ version = "0.1.0" dependencies = [ "ocrd >= 2.23.3", - "tensorflow >= 2.12.0", + "tensorflow == 2.12.1", "scikit-learn >= 0.23.2", "imutils >= 0.5.3", "numpy < 1.24.0", From 4f8210de71935f9980c121f5eaae4df2722903d7 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:23:48 +0200 Subject: [PATCH 15/64] update Makefile model location --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 525e6c3..440b0bd 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,14 @@ models: models_eynollah models_eynollah: models_eynollah.tar.gz # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + # tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + tar xf models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz # Install with pip install: From c10a525675690076c1d029a483c0ff997c0c0e17 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 23 Aug 2024 02:18:16 +0200 Subject: [PATCH 16/64] inference with batch size bigger than 1 --- qurator/eynollah/eynollah.py | 172 ++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 72 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b4e7276..2bf57a4 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -548,11 +548,11 @@ def resize_and_enhance_image_with_column_classifier(self,light_version): if self.input_binary: img = self.imread() if self.dir_in: - prediction_bin = self.do_prediction(True, img, self.model_bin) + prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) else: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin) + prediction_bin = self.do_prediction(True, img, model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 @@ -703,7 +703,7 @@ def start_new_session_and_model(self, model_dir): return model, None - def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -745,7 +745,17 @@ def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): if i == 0: @@ -766,59 +776,77 @@ def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -835,7 +863,7 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -1147,7 +1175,7 @@ def extract_text_regions_new(self, img, patches, cols): marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1173,7 +1201,7 @@ def extract_text_regions(self, img, patches, cols): img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1181,7 +1209,7 @@ def extract_text_regions(self, img, patches, cols): img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) elif cols > 2: @@ -1189,7 +1217,7 @@ def extract_text_regions(self, img, patches, cols): img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1245,7 +1273,7 @@ def extract_text_regions(self, img, patches, cols): img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1634,9 +1662,9 @@ def textline_contours(self, img, patches, scaler_h, scaler_w): img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1721,9 +1749,9 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -1870,9 +1898,9 @@ def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) if self.dir_in: - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) else: - prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, model_region, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) @@ -1905,9 +1933,9 @@ def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): else: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -1958,9 +1986,9 @@ def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] From 04e79002b3daa3f4e69921e6b94b3d0a6ee48639 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 24 Aug 2024 12:54:19 +0200 Subject: [PATCH 17/64] making light version faster for 1 and 2 columns images --- qurator/eynollah/eynollah.py | 88 ++++++++++++++++++------ qurator/eynollah/utils/separate_lines.py | 16 ++--- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2bf57a4..640db16 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -28,6 +28,7 @@ from scipy.ndimage import gaussian_filter1d os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf @@ -299,17 +300,25 @@ def __init__( def _cache_images(self, image_filename=None, image_pil=None): ret = {} + t_c0 = time.time() if image_filename: ret['img'] = cv2.imread(image_filename) - self.dpi = check_dpi(image_filename) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_filename) else: ret['img'] = pil2cv(image_pil) - self.dpi = check_dpi(image_pil) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret def reset_file_name_dir(self, image_filename): + t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename @@ -491,6 +500,27 @@ def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_p num_column_is_classified = True return img_new, num_column_is_classified + + def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): + self.logger.debug("enter calculate_width_height_by_columns") + if num_col == 1: + img_w_new = 1300 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + else: + img_w_new = 1500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) + + if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: + img_new = np.copy(img) + num_column_is_classified = False + elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + img_new = np.copy(img) + num_column_is_classified = False + else: + img_new = resize_image(img, img_h_new, img_w_new) + num_column_is_classified = True + + return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") @@ -600,16 +630,24 @@ def resize_and_enhance_image_with_column_classifier(self,light_version): self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + else: + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + image_res = np.copy(img_new) + is_image_enhanced = True + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -1175,7 +1213,7 @@ def extract_text_regions_new(self, img, patches, cols): marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1280,7 +1318,10 @@ def extract_text_regions(self, img, patches, cols): def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() + if len(contours)>15: + num_cores = cpu_count() + else: + num_cores = 1 queue_of_all_params = Queue() processes = [] @@ -1554,8 +1595,6 @@ def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_ mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - # plt.imshow(mask_only_con_region) - # plt.show() if self.textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) @@ -1660,11 +1699,11 @@ def textline_contours(self, img, patches, scaler_h, scaler_w): img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape') + #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1747,11 +1786,14 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) + t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2710,10 +2752,10 @@ def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_col return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction def run_enhancement(self,light_version): + t_in = time.time() self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2731,6 +2773,7 @@ def run_enhancement(self,light_version): if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) + #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page): @@ -2748,7 +2791,8 @@ def run_deskew(self, textline_mask_tot_ea): #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) + n_total_angles = 30 + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -2871,7 +2915,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') - + t_full0 = time.time() if self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) @@ -2963,12 +3007,12 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - + #print("full inside 1", time.time()- t_full0) if self.light_version: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) else: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) - + #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 @@ -3012,6 +3056,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') + #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables def our_load_model(self, model_file): @@ -3534,6 +3579,7 @@ def run(self): t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -3922,7 +3968,7 @@ def run(self): if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index acdc2e9..1004a92 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -1569,7 +1569,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1626,7 +1626,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): ang_int=0 - angels=np.linspace(ang_int-22.5,ang_int+22.5,100) + angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) var_res=[] for rot in angels: @@ -1649,7 +1649,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) var_res=[] @@ -1680,7 +1680,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,100) + angels=np.linspace(-90,-12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1700,7 +1700,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,12,100) + angels=np.linspace(90,12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1719,7 +1719,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): except: ang_int=0 else: - angels=np.linspace(-25,25,60) + angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) var_res=[] indexer=0 for rot in angels: @@ -1749,7 +1749,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=22 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-25,60) + angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) var_res=[] @@ -1772,7 +1772,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,25,60) + angels=np.linspace(90,25,int(n_tot_angles/2.)+10) var_res=[] From 7ae6a8776fb3cddc9279680f40fc23bc9b4df946 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Aug 2024 16:02:10 +0200 Subject: [PATCH 18/64] ignoring dpi check by light version --- qurator/eynollah/eynollah.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 640db16..ff35d6f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -504,11 +504,11 @@ def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_p def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: + img_w_new = 1000 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1000) + else: img_w_new = 1300 img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) - else: - img_w_new = 1500 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -1213,7 +1213,7 @@ def extract_text_regions_new(self, img, patches, cols): marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1810,7 +1810,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): #print("inside 2 ", time.time()-t_in) - + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) From 93005959e54abf5f67def79868b8fd8d8831e287 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 27 Aug 2024 18:13:46 +0200 Subject: [PATCH 19/64] inference batch size debugged --- qurator/eynollah/eynollah.py | 71 +++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index ff35d6f..f183dee 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.0005 +MIN_AREA_REGION = 0.00001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -182,6 +182,7 @@ def __init__( logger=None, pcgts=None, ): + self.light_version = light_version if not dir_in: if image_pil: self._imgs = self._cache_images(image_pil=image_pil) @@ -209,7 +210,6 @@ def __init__( self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off - self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr self.pcgts = pcgts @@ -828,7 +828,64 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa batch_indexer = batch_indexer + 1 if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -885,6 +942,7 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -1789,9 +1847,9 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] @@ -1808,7 +1866,6 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -1839,6 +1896,10 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + mask_texts_only = mask_texts_only.astype('uint8') + + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) From 0f87974b0c7a7bdfddd31ffa99b89c58c952ddcf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 2 Sep 2024 16:21:07 +0200 Subject: [PATCH 20/64] writing drop capitals in xml output + and may resolve issue #110 --- qurator/eynollah/eynollah.py | 23 ++++++++++++----------- qurator/eynollah/writer.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f183dee..1bb0eff 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3735,9 +3735,9 @@ def run(self): contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3753,10 +3753,11 @@ def run(self): if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - if len(contours_only_text_parent_d)>1: + try: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - else: + except: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3819,9 +3820,9 @@ def run(self): areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3864,10 +3865,10 @@ def run(self): #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - if len(contours_only_text_parent_d_ordered)>1: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + except: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3957,9 +3958,9 @@ def run(self): if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if len(contours_only_text_parent_d_ordered)>1: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + except: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index 29caddc..8eb1027 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -136,6 +136,29 @@ def serialize_lines_in_region(self, text_region, all_found_textline_polygons, re points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) + + def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + self.logger.debug('enter serialize_lines_in_region') + for j in range(1): + coords = CoordsType() + textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + text_region.add_TextLine(textline) + #region_bboxes = all_box_coord[region_idx] + points_co = '' + for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]): + if len(contour_textline) == 2: + points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) + else: + points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + + points_co += ' ' + coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" @@ -251,8 +274,12 @@ def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_t self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_drop_capitals)): - page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) + dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + page.add_TextRegion(dropcapital) + all_box_coord_drop = None + slopes_drop = None + self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) From c3a4a1bba77d40b9be8926483e40a1ccefe42198 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 13:14:10 +0200 Subject: [PATCH 21/64] resolving issue #110 in a better way --- qurator/eynollah/eynollah.py | 61 ++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1bb0eff..c88f0f9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2357,7 +2357,6 @@ def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - print(cx_text_only[ii],cy_text_only[ii],'markaz') if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break @@ -3624,6 +3623,9 @@ def return_textline_contour_with_added_box_coordinate(self, textline_contour, b textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def run(self): """ @@ -3735,11 +3737,15 @@ def run(self): contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3753,12 +3759,14 @@ def run(self): if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - try: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - except: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -3820,11 +3828,14 @@ def run(self): areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3865,10 +3876,11 @@ def run(self): #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3958,10 +3970,11 @@ def run(self): if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From f0b49073b7ba4746e1facd17cf8f8598e253b1d4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 23:10:38 +0200 Subject: [PATCH 22/64] adding option for textline detection in printspace --- qurator/eynollah/eynollah.py | 939 +++++++++++++++++++---------------- 1 file changed, 512 insertions(+), 427 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c88f0f9..533e2a0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -741,7 +741,7 @@ def start_new_session_and_model(self, model_dir): return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -774,7 +774,7 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) - img = img.astype(np.float16) + #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) @@ -832,6 +832,23 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch,:,:] @@ -889,6 +906,22 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1202,9 +1235,9 @@ def extract_text_regions_new(self, img, patches, cols): img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) else: - model_region = self.model_region_fl_new if patches else self.model_region_fl_np + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: if self.light_version: @@ -1809,7 +1842,7 @@ def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_t q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -1866,89 +1899,98 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - else: + if not skip_layout_ro: + #print("inside 2 ", time.time()-t_in) + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") + if not self.dir_in: + ###if num_col_classifier == 1 or num_col_classifier == 2: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + ###else: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - else: - if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) - #plt.show() + ##if num_col_classifier == 1 or num_col_classifier == 2: + ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + ##else: + ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - - prediction_regions_org=prediction_regions_org[:,:,0] + #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) + #plt.show() + + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_texts_only = mask_texts_only.astype('uint8') - - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - - #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) - #test_khat = np.zeros(prediction_regions_org.shape) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + mask_texts_only = mask_texts_only.astype('uint8') - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - #sys.exit() - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - - text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + else: + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + return None, erosion_hurts, None, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2392,8 +2434,6 @@ def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours ref_point += len(id_of_texts) order_of_texts_tot = [] - print(len(contours_only_text_parent),'contours_only_text_parent') - print(len(order_by_con_main),'order_by_con_main') for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) @@ -2768,6 +2808,28 @@ def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light + + def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() + img_g = self.imread(grayscale=True, uint8=True) + + img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) + img_g3 = img_g3.astype(np.uint8) + img_g3[:, :, 0] = img_g[:, :] + img_g3[:, :, 1] = img_g[:, :] + img_g3[:, :, 2] = img_g[:, :] + + image_page, page_coord, cont_page = self.extract_page() + #print("inside graphics 1 ", time.time() - t_in_gr) + + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) @@ -3632,6 +3694,8 @@ def run(self): Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") + + skip_layout_ro = True t0_tot = time.time() @@ -3649,398 +3713,419 @@ def run(self): self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + if not skip_layout_ro: + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + #plt.imshow(table_prediction) + #plt.show() - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass - - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - if not self.curved_line: + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - else: - - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: - #takes long timee - contours_only_text_parent_d_ordered = None + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + if not self.curved_line: if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.textline_light: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() - if not self.reading_order_machine_based: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - #print(boxes_d,'boxes_d') - #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) - #for box_i in boxes_d: - #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 - #plt.imshow(img_once) - #plt.show() - #print(np.unique(img_once),'img_once') - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - if self.full_layout: - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.ocr: + ocr_all_textlines = [] else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] - else: - ocr_all_textlines = None + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - - - else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + - if self.ocr: + if self.ocr: - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - ocr_textline_in_textregion.append(text_ocr) - - ##cv2.imwrite(str(ind_tot)+'.png', img_croped) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) + else: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + all_found_textline_polygons=[ all_found_textline_polygons ] + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] + + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) From 2c939049854c73c7dc27e4b04863c8498d654129 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 12 Sep 2024 17:35:28 +0200 Subject: [PATCH 23/64] avoiding double binarization --- qurator/eynollah/eynollah.py | 155 +++++++++++++++++++---------- qurator/eynollah/utils/__init__.py | 4 +- 2 files changed, 106 insertions(+), 53 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 533e2a0..569aec5 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.00001 +MIN_AREA_REGION = 0.000001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -237,15 +237,16 @@ def __init__( self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" - self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" + #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" - self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -267,7 +268,7 @@ def __init__( self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) - self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) + ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -993,9 +994,16 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - + + seg_not_base = label_p_pred[0,:,:,4] + + seg_not_base[seg_not_base>0.4] =1 + seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] + + seg[seg_not_base==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1781,7 +1789,7 @@ def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_t all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - def textline_contours(self, img, patches, scaler_h, scaler_w): + def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) @@ -1792,10 +1800,34 @@ def textline_contours(self, img, patches, scaler_h, scaler_w): img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) + + textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 + + old_art = np.copy(textline_mask_tot_ea_art) + + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + + textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 + textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 + + prediction_textline[:,:][old_art[:,:]==1]=2 + if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) else: @@ -1855,49 +1887,58 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 900#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1500 + img_w_new = 1300#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 2000 + img_w_new = 1600#2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 2500 + img_w_new = 1900#2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 3000 + img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 4000 + img_w_new = 3300#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) + #if (not self.input_binary) or self.full_layout: + #if self.input_binary: + #img_bin = np.copy(img_resized) + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + #print("inside bin ", time.time()-t_bin) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) - textline_mask_tot_ea = self.run_textline(img_bin) + ###textline_mask_tot_ea = self.run_textline(img_bin) + textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -1906,20 +1947,20 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: - ###if num_col_classifier == 1 or num_col_classifier == 2: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - ###else: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - ##if num_col_classifier == 1 or num_col_classifier == 2: - ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) - ##else: - ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) @@ -1937,7 +1978,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2899,10 +2940,11 @@ def run_enhancement(self,light_version): #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified - def run_textline(self, image_page): - scaler_h_textline = 1 # 1.2#1.2 - scaler_w_textline = 1 # 0.9#1 - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) + def run_textline(self, image_page, num_col_classifier=None): + scaler_h_textline = 1#1.3 # 1.2#1.2 + scaler_w_textline = 1#1.3 # 0.9#1 + #print(image_page.shape) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3147,6 +3189,17 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 drop_capital_label_in_full_layout_model = 3 + + drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 + + drops= drops.astype(np.uint8) + + regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 + + drops = cv2.erode(drops[:,:], KERNEL, iterations=1) + regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model + + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: @@ -3695,7 +3748,7 @@ def run(self): """ self.logger.debug("enter run") - skip_layout_ro = True + skip_layout_ro = False#True t0_tot = time.time() diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 929669f..8705ecf 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,11 +792,11 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch From 1b18ae874b9ea086e99ac76281dd30572f947471 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 13 Sep 2024 00:52:06 +0200 Subject: [PATCH 24/64] passing number of columns as an argument --- qurator/eynollah/cli.py | 14 +++++- qurator/eynollah/eynollah.py | 96 ++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b0f55cd..357582c 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -191,6 +191,16 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i is_flag=True, help="if this parameter set to true, this tool will try to do ocr", ) +@click.option( + "--num_col_upper", + "-ncu", + help="lower limit of columns in document image", +) +@click.option( + "--num_col_lower", + "-ncl", + help="upper limit of columns in document image", +) @click.option( "--log_level", "-l", @@ -198,7 +208,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -235,6 +245,8 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, + num_col_upper=num_col_upper, + num_col_lower=num_col_lower, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 569aec5..f76dce8 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -178,6 +178,8 @@ def __init__( ignore_page_extraction=False, reading_order_machine_based=False, do_ocr=False, + num_col_upper=None, + num_col_lower=None, override_dpi=None, logger=None, pcgts=None, @@ -212,6 +214,14 @@ def __init__( self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr + if num_col_upper: + self.num_col_upper = int(num_col_upper) + else: + self.num_col_upper = num_col_upper + if num_col_lower: + self.num_col_lower = int(num_col_lower) + else: + self.num_col_lower = num_col_lower self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -597,36 +607,80 @@ def resize_and_enhance_image_with_column_classifier(self,light_version): else: img = self.imread() img_bin = None - + + width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) - if self.input_binary: - img_in = np.copy(img) - width_early = img_in.shape[1] - img_in = img_in / 255.0 - img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = img_in.reshape(1, 448, 448, 3) - else: - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + if self.num_col_upper and not self.num_col_lower: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + elif self.num_col_lower and not self.num_col_upper: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + + elif (not self.num_col_upper and not self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] + + + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + + if num_col > self.num_col_upper: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + if num_col < self.num_col_lower: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) From 21380fc8706474f0c6c791560fb6a5174d03aa8e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 17 Sep 2024 15:06:41 +0200 Subject: [PATCH 25/64] scaling contours without dilation --- qurator/eynollah/eynollah.py | 207 +++++++++++++++++++++++++++++++---- 1 file changed, 184 insertions(+), 23 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f76dce8..79cf98b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -256,7 +256,7 @@ def __init__( ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -796,7 +796,7 @@ def start_new_session_and_model(self, model_dir): return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -903,6 +903,13 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -977,6 +984,14 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1845,42 +1860,50 @@ def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_t def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') + thresholding_for_artificial_class_in_light_version = True#False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape textline') + if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) - textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - prediction_textline[:,:][old_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + prediction_textline[:,:][old_art[:,:]==1]=2 if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1959,7 +1982,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3300#4000 + img_w_new = 3000#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -1968,7 +1991,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -3794,15 +3817,146 @@ def return_textline_contour_with_added_box_coordinate(self, textline_contour, b return textline_contour def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - + + def scale_contours(self,all_found_textline_polygons): + for i in range(len(all_found_textline_polygons[0])): + con_ind = all_found_textline_polygons[0][i] + x_min = np.min( con_ind[:,0,0] ) + y_min = np.min( con_ind[:,0,1] ) + + x_max = np.max( con_ind[:,0,0] ) + y_max = np.max( con_ind[:,0,1] ) + + x_mean = np.mean( con_ind[:,0,0] ) + y_mean = np.mean( con_ind[:,0,1] ) + + arg_y_max = np.argmax( con_ind[:,0,1] ) + arg_y_min = np.argmin( con_ind[:,0,1] ) + + x_cor_y_max = con_ind[arg_y_max,0,0] + x_cor_y_min = con_ind[arg_y_min,0,0] + + m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + + con_scaled = con_ind*1 + + con_scaled = con_scaled.astype(np.float) + + con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) + con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) + + + if (x_max - x_min) > (y_max - y_min): + + if (y_max-y_min)<=15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.8 + + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)<=30 and (y_max-y_min)>15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.6 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)>30 and (y_max-y_min)<100: + con_scaled[:,0,1] = con_ind[:,0,1]*1.35 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + else: + con_scaled[:,0,1] = con_ind[:,0,1]*1.2 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + + + + if y_max_expected<=y_max_scaled: + con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + + con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) + con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + + else: + + if (x_max-x_min)<=15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + elif (x_max-x_min)<=30 and (x_max-x_min)>15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + elif (x_max-x_min)>30 and (x_max-x_min)<100: + con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + else: + con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + + + x_min_n = np.min( con_scaled[:,0,0] ) + y_min_n = np.min( con_scaled[:,0,1] ) + + x_mean_n = np.mean( con_scaled[:,0,0] ) + y_mean_n = np.mean( con_scaled[:,0,1] ) + + ##diff_x = (x_min_n - x_min)*1 + ##diff_y = (y_min_n - y_min)*1 + + diff_x = (x_mean_n - x_mean)*1 + diff_y = (y_mean_n - y_mean)*1 + + + con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) + con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) + + x_max_n = np.max( con_scaled[:,0,0] ) + y_max_n = np.max( con_scaled[:,0,1] ) + + diff_disp_x = (x_max_n - x_max) / 2. + diff_disp_y = (y_max_n - y_max) / 2. + + x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) + y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) + all_found_textline_polygons[0][i][:,0,0] = x_vals[:] + all_found_textline_polygons[0][i][:,0,1] = y_vals[:] + return all_found_textline_polygons + + def scale_contours_new(self, textline_mask_tot_ea): + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + + textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) + ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + for i in range(len(all_found_textline_polygons)): + + #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) + y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) + + #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) + y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + + ydiff = y_mean - y_mean_1 + + all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff + return all_found_textline_polygons + + def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - skip_layout_ro = False#True + skip_layout_ro = True t0_tot = time.time() @@ -3820,7 +3974,6 @@ def run(self): self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) @@ -4032,6 +4185,7 @@ def run(self): if self.textline_light: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) @@ -4212,10 +4366,17 @@ def run(self): page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] + + all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + + order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] From a1f1f98de3ad7500c80bb5d183fc86aa66e031e5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 18 Sep 2024 00:08:54 +0200 Subject: [PATCH 26/64] updating scaling contours --- qurator/eynollah/eynollah.py | 82 ++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79cf98b..bbfba0f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3821,23 +3821,51 @@ def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): def scale_contours(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] - x_min = np.min( con_ind[:,0,0] ) - y_min = np.min( con_ind[:,0,1] ) - x_max = np.max( con_ind[:,0,0] ) - y_max = np.max( con_ind[:,0,1] ) + con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - x_mean = np.mean( con_ind[:,0,0] ) - y_mean = np.mean( con_ind[:,0,1] ) + + m_arr = y_differential / x_differential + + #print(x_differential, 'x_differential') + + #print(y_differential, 'y_differential') + + #print(m_arr) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_mean = float(np.mean( con_ind[:,0,0] )) + y_mean = float(np.mean( con_ind[:,0,1] )) arg_y_max = np.argmax( con_ind[:,0,1] ) arg_y_min = np.argmin( con_ind[:,0,1] ) - x_cor_y_max = con_ind[arg_y_max,0,0] - x_cor_y_min = con_ind[arg_y_min,0,0] - m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + arg_x_max = np.argmax( con_ind[:,0,0] ) + arg_x_min = np.argmin( con_ind[:,0,0] ) + + x_cor_y_max = float(con_ind[arg_y_max,0,0]) + x_cor_y_min = float(con_ind[arg_y_min,0,0]) + + y_cor_x_max = float(con_ind[arg_x_max,0,1]) + y_cor_x_min = float(con_ind[arg_x_min,0,1]) + + if (x_cor_y_max - x_cor_y_min) != 0: + m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) + else: + m_con= None + + + m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) + #print(m_con,m_con_x, 'm_con') con_scaled = con_ind*1 con_scaled = con_scaled.astype(np.float) @@ -3845,7 +3873,6 @@ def scale_contours(self,all_found_textline_polygons): con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - if (x_max - x_min) > (y_max - y_min): if (y_max-y_min)<=15: @@ -3877,7 +3904,7 @@ def scale_contours(self,all_found_textline_polygons): - + #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") if y_max_expected<=y_max_scaled: con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled @@ -3885,17 +3912,48 @@ def scale_contours(self,all_found_textline_polygons): con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled else: - + #print(x_max-x_min, m_con_x,'m_con_x') if (x_max-x_min)<=15: con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)<=30 and (x_max-x_min)>15: con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)>30 and (x_max-x_min)<100: con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + else: con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + #print(x_max_expected, x_max_scaled, "x_max_scaled") + if x_max_expected<=x_max_scaled: + con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) + con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_min_n = np.min( con_scaled[:,0,0] ) y_min_n = np.min( con_scaled[:,0,1] ) From 5a07cd9cfa9713e8944195fff6416ed6e639c121 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 19 Sep 2024 16:21:55 +0200 Subject: [PATCH 27/64] the most effective version of contours dilation without opencv and all at once --- qurator/eynollah/eynollah.py | 258 +++++++++++++---------------------- 1 file changed, 97 insertions(+), 161 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index bbfba0f..cb70107 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1964,7 +1964,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 900#1000 + img_w_new = 800#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -3818,196 +3818,132 @@ def return_textline_contour_with_added_box_coordinate(self, textline_contour, b def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - def scale_contours(self,all_found_textline_polygons): + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - - m_arr = y_differential / x_differential - - #print(x_differential, 'x_differential') - - #print(y_differential, 'y_differential') - - #print(m_arr) - x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) + - x_mean = float(np.mean( con_ind[:,0,0] )) - y_mean = float(np.mean( con_ind[:,0,1] )) - - arg_y_max = np.argmax( con_ind[:,0,1] ) - arg_y_min = np.argmin( con_ind[:,0,1] ) - - - arg_x_max = np.argmax( con_ind[:,0,0] ) - arg_x_min = np.argmin( con_ind[:,0,0] ) - - x_cor_y_max = float(con_ind[arg_y_max,0,0]) - x_cor_y_min = float(con_ind[arg_y_min,0,0]) - - - y_cor_x_max = float(con_ind[arg_x_max,0,1]) - y_cor_x_min = float(con_ind[arg_x_min,0,1]) - - if (x_cor_y_max - x_cor_y_min) != 0: - m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) - else: - m_con= None - - - m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) - #print(m_con,m_con_x, 'm_con') - con_scaled = con_ind*1 - - con_scaled = con_scaled.astype(np.float) - - con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) - con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - - if (x_max - x_min) > (y_max - y_min): - - if (y_max-y_min)<=15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.8 - - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + + mult = x_biger_than_x*x_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if y_differential[0]==0: + y_differential[0] = 0.1 + + if y_differential[-1]==0: + y_differential[-1]= 0.1 - y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)<=30 and (y_max-y_min)>15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.6 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)>30 and (y_max-y_min)<100: - con_scaled[:,0,1] = con_ind[:,0,1]*1.35 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - else: - con_scaled[:,0,1] = con_ind[:,0,1]*1.2 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - - y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") - if y_max_expected<=y_max_scaled: - con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] - con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) - con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + y_differential.append(y_differential[0]) + + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = np.array(y_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass + + else: - #print(x_max-x_min, m_con_x,'m_con_x') - if (x_max-x_min)<=15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.8 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)<=30 and (x_max-x_min)>15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.6 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)>30 and (x_max-x_min)<100: - con_scaled[:,0,0] = con_ind[:,0,0]*1.35 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - else: - con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - #print(x_max_expected, x_max_scaled, "x_max_scaled") - if x_max_expected<=x_max_scaled: - con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] - con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) - con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = np.array(x_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - - x_min_n = np.min( con_scaled[:,0,0] ) - y_min_n = np.min( con_scaled[:,0,1] ) - - x_mean_n = np.mean( con_scaled[:,0,0] ) - y_mean_n = np.mean( con_scaled[:,0,1] ) - - ##diff_x = (x_min_n - x_min)*1 - ##diff_y = (y_min_n - y_min)*1 - - diff_x = (x_mean_n - x_mean)*1 - diff_y = (y_mean_n - y_mean)*1 - - - con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) - con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) - - x_max_n = np.max( con_scaled[:,0,0] ) - y_max_n = np.max( con_scaled[:,0,1] ) - - diff_disp_x = (x_max_n - x_max) / 2. - diff_disp_y = (y_max_n - y_max) / 2. - - x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) - y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) - all_found_textline_polygons[0][i][:,0,0] = x_vals[:] - all_found_textline_polygons[0][i][:,0,1] = y_vals[:] - return all_found_textline_polygons - - def scale_contours_new(self, textline_mask_tot_ea): - - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - - textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) - ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - for i in range(len(all_found_textline_polygons)): - - #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) - y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) - #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) - y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - ydiff = y_mean - y_mean_1 + all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] - all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff return all_found_textline_polygons - - def run(self): """ Get image and scales, then extract the page of scanned image @@ -4432,7 +4368,7 @@ def run(self): all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) order_text_new = [0] From 2d18739d9b267a14dfe0934b02772940976a8e72 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 15:08:09 +0200 Subject: [PATCH 28/64] postprocessing of textline contour dilation + skip layout and reading order passed as an argument --- qurator/eynollah/cli.py | 9 +++++++- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 357582c..b293403 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -201,6 +201,12 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i "-ncl", help="upper limit of columns in document image", ) +@click.option( + "--skip_layout_and_reading_order", + "-slro/-noslro", + is_flag=True, + help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.", +) @click.option( "--log_level", "-l", @@ -208,7 +214,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -247,6 +253,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s do_ocr=do_ocr, num_col_upper=num_col_upper, num_col_lower=num_col_lower, + skip_layout_and_reading_order=skip_layout_and_reading_order, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cb70107..0619ef0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -180,6 +180,7 @@ def __init__( do_ocr=False, num_col_upper=None, num_col_lower=None, + skip_layout_and_reading_order = False, override_dpi=None, logger=None, pcgts=None, @@ -213,6 +214,7 @@ def __init__( self.allow_scaling = allow_scaling self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction + self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr if num_col_upper: self.num_col_upper = int(num_col_upper) @@ -1951,7 +1953,7 @@ def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_t q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -2019,7 +2021,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - if not skip_layout_ro: + if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -3818,6 +3820,30 @@ def return_textline_contour_with_added_box_coordinate(self, textline_contour, b def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def return_it_in_two_groups(self,x_differential): + split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] + + split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) + + if 0 not in split_masked: + split_masked.insert(0, -1) + + split_masked.append(len(x_differential)-1) + + split_masked = np.array(split_masked) +1 + + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] + + indexes_to_bec_changed = [ind if ( np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1) ] + + indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] + + x_differential_new = np.copy(x_differential) + for i in indexes_to_bec_changed_filtered: + x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] + + return x_differential_new + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] @@ -3863,6 +3889,8 @@ def dilate_textlines(self,all_found_textline_polygons): y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + y_differential = self.return_it_in_two_groups(y_differential) + y_differential = np.array(y_differential) @@ -3890,7 +3918,6 @@ def dilate_textlines(self,all_found_textline_polygons): else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) mult = y_biger_than_x*y_differential @@ -3918,8 +3945,10 @@ def dilate_textlines(self,all_found_textline_polygons): x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) + con_scaled = con_ind*1 con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential @@ -3949,8 +3978,6 @@ def run(self): Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - skip_layout_ro = True t0_tot = time.time() @@ -3968,7 +3995,7 @@ def run(self): self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: + if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4356,7 +4383,7 @@ def run(self): return pcgts #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) From b9e8959c4aefb0b9d24efb99abc309d7d350163c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 16:33:13 +0200 Subject: [PATCH 29/64] update of light versions --- qurator/eynollah/eynollah.py | 238 ++++++++++++++++++----------------- 1 file changed, 126 insertions(+), 112 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0619ef0..c7407e2 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1862,7 +1862,10 @@ def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_t def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') - thresholding_for_artificial_class_in_light_version = True#False + if self.textline_light: + thresholding_for_artificial_class_in_light_version = True#False + else: + thresholding_for_artificial_class_in_light_version = False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) #img = img.astype(np.uint8) @@ -2016,7 +2019,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2057,7 +2060,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2101,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -3845,132 +3850,139 @@ def return_it_in_two_groups(self,x_differential): return x_differential_new def dilate_textlines(self,all_found_textline_polygons): - for i in range(len(all_found_textline_polygons[0])): - con_ind = all_found_textline_polygons[0][i] - - con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + for j in range(len(all_found_textline_polygons)): + for i in range(len(all_found_textline_polygons[j])): + con_ind = all_found_textline_polygons[j][i] - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + con_ind = con_ind.astype(np.float) - mult = x_biger_than_x*x_differential + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) - if y_differential[0]==0: - y_differential[0] = 0.1 + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + - if y_differential[-1]==0: - y_differential[-1]= 0.1 + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + mult = x_biger_than_x*x_differential - y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] - - y_differential = self.return_it_in_two_groups(y_differential) - - y_differential = np.array(y_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - - if x_differential[-1]==0: - x_differential[-1]= 0.1 + if y_differential[0]==0: + y_differential[0] = 0.1 + if y_differential[-1]==0: + y_differential[-1]= 0.1 + + + + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] + + y_differential.append(y_differential[0]) - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] - - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = self.return_it_in_two_groups(y_differential) + + y_differential = np.array(y_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + else: + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 + + + + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] + + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] + + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = self.return_it_in_two_groups(x_differential) + x_differential = np.array(x_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + try: + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + try: + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + except: + pass + - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] + all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def run(self): @@ -4207,6 +4219,8 @@ def run(self): slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) From 5d680136a4ed752e398cd47d3be0fd5aaf698f13 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 01:04:28 +0200 Subject: [PATCH 30/64] updating light version --- qurator/eynollah/eynollah.py | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c7407e2..629818f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -260,7 +260,7 @@ def __init__( if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -1916,11 +1916,7 @@ def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - - if self.textline_light: - return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 - else: - return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): @@ -1996,7 +1992,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -4066,8 +4062,35 @@ def run(self): t1 = time.time() #plt.imshow(table_prediction) #plt.show() - + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() @@ -4222,18 +4245,20 @@ def run(self): all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: From 7f08458436d1f6aad43f809b3a388c8c275d44f7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 14:39:54 +0200 Subject: [PATCH 31/64] dilation of text regions without opencv --- qurator/eynollah/eynollah.py | 84 +++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 629818f..b2dea47 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -37,9 +37,7 @@ sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") -from scipy.signal import find_peaks import matplotlib.pyplot as plt -from scipy.ndimage import gaussian_filter1d from tensorflow.python.keras.backend import set_session from tensorflow.keras import layers @@ -2056,8 +2054,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2095,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -3845,6 +3845,79 @@ def return_it_in_two_groups(self,x_differential): return x_differential_new + def dilate_textregions_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + + con_ind = all_found_textline_polygons[j] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons + + + + + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4096,7 +4169,7 @@ def run(self): t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None @@ -4230,6 +4303,7 @@ def run(self): #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = self.dilate_textregions_contours(txt_con_org) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From 62f8ae486043ddf9e39b057e754cc28081275ce3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 14:03:07 +0200 Subject: [PATCH 32/64] updating dilation of textlines and text regions --- qurator/eynollah/eynollah.py | 96 +++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b2dea47..fb2d699 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3855,6 +3855,7 @@ def dilate_textregions_contours(self,all_found_textline_polygons): x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) + x_differential = gaussian_filter1d(x_differential, 3) y_differential = gaussian_filter1d(y_differential, 3) @@ -3912,6 +3913,93 @@ def dilate_textregions_contours(self,all_found_textline_polygons): return all_found_textline_polygons + def dilate_textline_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + ##if (y_max-y_min)<40: + ##dilation_m1 = 5 + ##dilation_m2 = int(dilation_m1/2.) +1 + ##else: + ##dilation_m1 = 12 + ##dilation_m2 = int(dilation_m1/2.) +1 + + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + else: + dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons @@ -4174,6 +4262,7 @@ def run(self): if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4304,6 +4393,7 @@ def run(self): if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) txt_con_org = self.dilate_textregions_contours(txt_con_org) + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) @@ -4316,7 +4406,9 @@ def run(self): slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -4508,7 +4600,7 @@ def run(self): all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) order_text_new = [0] From 6626dc68660d239cf8a4a15b64e8bb670e395409 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 15:50:37 +0200 Subject: [PATCH 33/64] updating textline dilation parameters --- qurator/eynollah/eynollah.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fb2d699..a69854d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3950,16 +3950,26 @@ def dilate_textline_contours(self,all_found_textline_polygons): ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 2/20.0 ) elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 2/20.0 ) elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 1/20.0 ) else: dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if dilation_m1>12: + dilation_m1 = 12 + if dilation_m1<4: + dilation_m1 = 4 + #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): From b33739adeef5cd40b48faa3a955cd1d473b5e250 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 24 Sep 2024 16:06:27 +0200 Subject: [PATCH 34/64] parametriyation in the case of textline contours dilation is accomplished --- qurator/eynollah/eynollah.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a69854d..8c0979d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3919,6 +3919,8 @@ def dilate_textline_contours(self,all_found_textline_polygons): con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) x_differential = np.diff( con_ind[:,0,0]) @@ -3943,6 +3945,7 @@ def dilate_textline_contours(self,all_found_textline_polygons): #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + #print(area / (x_max-x_min)) ##if (y_max-y_min)<40: ##dilation_m1 = 5 ##dilation_m2 = int(dilation_m1/2.) +1 @@ -3950,20 +3953,26 @@ def dilate_textline_contours(self,all_found_textline_polygons): ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########else: + #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.35) else: - dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m1 = round(area / (y_max-y_min) * 0.35) + if dilation_m1>12: dilation_m1 = 12 From 95effe54a0159811b80c7ca5bd9147d196ef5187 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 25 Sep 2024 20:00:53 +0200 Subject: [PATCH 35/64] updating textregions dilation --- qurator/eynollah/eynollah.py | 151 ++++++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8c0979d..794ebe6 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2054,7 +2054,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3846,18 +3846,22 @@ def return_it_in_two_groups(self,x_differential): return x_differential_new def dilate_textregions_contours(self,all_found_textline_polygons): + #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] - + area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) + con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) + con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) + x_differential = gaussian_filter1d(x_differential, .5) + y_differential = gaussian_filter1d(y_differential, .5) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3873,23 +3877,54 @@ def dilate_textregions_contours(self,all_found_textline_polygons): inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<5: + dilation_m1 = 5 + #print(dilation_m1, 'dilation_m1') + dilation_m2 = int(dilation_m1/2.) +1 + for i in range(len(x_differential)): if abs_diff[i]==0: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###for i in range(len(x_differential)): + ###if abs_diff[i]==0: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + ###elif abs_diff[i]!=0 and abs_diff[i]>=3: + ###if abs(x_differential[i])>abs(y_differential[i]): + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) ###inc_x =list(inc_x) ###inc_x.append(inc_x[0]) @@ -3908,6 +3943,98 @@ def dilate_textregions_contours(self,all_found_textline_polygons): con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + #print(area_scaled / area, "ratio") + #print(results,'results') + #if results[0]==1 and diff_result[-1]==-2: + ##indices_2 = indices_2[1:] + ##indices_m2 = indices_m2[1:] + + #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] + + + #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] + + #indices_2 = indices_2[:-1] + #indices_m2 = indices_m2[1:-1] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + + #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) + + + #print(diff_neg_pos,'diff') + ##print(indices_2, 'indices_2') + #indices_2 = np.array(indices_2)[diff_neg_pos>1] + #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] + + for ii in range(len(indices_2)): + + #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] + #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] + + #if x_inner[-1]>=x_inner[0]: + #x_interest = np.min(x_inner) + #else: + #x_interest = np.max(x_inner) + + #if y_inner[-1]>=y_inner[0]: + #y_interest = np.min(y_inner) + #else: + #y_interest = np.max(y_inner) + + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + + #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] + #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] + + #print(list(results), 'results') + #print(list(diff_result), 'diff_result') + #print(indices_2,'2') + #print(indices_m2,'-2') + #print(diff_neg_pos,'diff_neg_pos') + + #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + + con_scaled[-1,0, 1] = con_scaled[0,0, 1] + con_scaled[-1,0, 0] = con_scaled[0,0, 0] all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 133091137dc01f04eedf153119a04559a8f0633d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 13:57:01 +0200 Subject: [PATCH 36/64] dilation of textregions and marginals are accomplished --- qurator/eynollah/eynollah.py | 454 ++++++++++++++++++++++++----------- 1 file changed, 314 insertions(+), 140 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 794ebe6..2fe7325 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1050,7 +1050,7 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa #del model #gc.collect() return prediction_true - def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -1064,14 +1064,14 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - seg_not_base = label_p_pred[0,:,:,4] + #seg_not_base = label_p_pred[0,:,:,4] - seg_not_base[seg_not_base>0.4] =1 - seg_not_base[seg_not_base<1] =0 + #seg_not_base[seg_not_base>0.4] =1 + #seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] - seg[seg_not_base==1]=4 + #seg[seg_not_base==1]=4 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -1099,6 +1099,16 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): @@ -1120,44 +1130,57 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + #img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + #verbose=0) + #seg = np.argmax(label_p_pred, axis=3)[0] - seg_not_base = label_p_pred[0,:,:,4] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_not_base = label_p_pred[0,:,:,4] + ########seg2 = -label_p_pred[0,:,:,2] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + ######seg_not_base[seg_not_base>0.03] =1 + ######seg_not_base[seg_not_base<1] =0 - seg_test = label_p_pred[0,:,:,1] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_test = label_p_pred[0,:,:,1] + ########seg2 = -label_p_pred[0,:,:,2] - seg_test[seg_test>0.75] =1 - seg_test[seg_test<1] =0 + ######seg_test[seg_test>0.75] =1 + ######seg_test[seg_test<1] =0 - seg_line = label_p_pred[0,:,:,3] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_line = label_p_pred[0,:,:,3] + ########seg2 = -label_p_pred[0,:,:,2] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 + ######seg_line[seg_line>0.1] =1 + ######seg_line[seg_line<1] =0 - seg_background = label_p_pred[0,:,:,0] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_background = label_p_pred[0,:,:,0] + ########seg2 = -label_p_pred[0,:,:,2] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ######seg_background[seg_background>0.25] =1 + ######seg_background[seg_background<1] =0 ##seg = seg+seg2 #seg = label_p_pred[0,:,:,2] #seg[seg>0.4] =1 @@ -1170,56 +1193,221 @@ def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_perce ##plt.show() #seg[seg==1]=0 #seg[seg_test==1]=1 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + ######seg[seg_not_base==1]=4 + ######seg[seg_background==1]=0 + ######seg[(seg_line==1) & (seg==0)]=3 + #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + #if i == 0 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + #else: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) return prediction_true @@ -1963,7 +2151,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800#1000 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -1971,17 +2159,17 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 1600#2000 + img_w_new = 2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 1900#2500 + img_w_new = 2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 2300#3000 + img_w_new = 3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3000#4000 + img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -2025,17 +2213,17 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -2054,8 +2242,12 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + + mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3150,7 +3342,14 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3241,7 +3440,15 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3850,18 +4057,19 @@ def dilate_textregions_contours(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] + #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) - con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) + #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, .5) - y_differential = gaussian_filter1d(y_differential, .5) + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3885,8 +4093,8 @@ def dilate_textregions_contours(self,all_found_textline_polygons): if dilation_m1>8: dilation_m1 = 8 - if dilation_m1<5: - dilation_m1 = 5 + if dilation_m1<6: + dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 @@ -4002,7 +4210,6 @@ def dilate_textregions_contours(self,all_found_textline_polygons): #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] @@ -4030,11 +4237,12 @@ def dilate_textregions_contours(self,all_found_textline_polygons): #print(indices_m2,'-2') #print(diff_neg_pos,'diff_neg_pos') - #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - con_scaled[-1,0, 1] = con_scaled[0,0, 1] - con_scaled[-1,0, 0] = con_scaled[0,0, 0] + #con_scaled[-1,0, 1] = con_scaled[0,0, 1] + #con_scaled[-1,0, 0] = con_scaled[0,0, 0] + ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4045,7 +4253,7 @@ def dilate_textline_contours(self,all_found_textline_polygons): for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - + print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4069,31 +4277,6 @@ def dilate_textline_contours(self,all_found_textline_polygons): inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - - - #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) - #print(area / (x_max-x_min)) - ##if (y_max-y_min)<40: - ##dilation_m1 = 5 - ##dilation_m2 = int(dilation_m1/2.) +1 - ##else: - ##dilation_m1 = 12 - ##dilation_m2 = int(dilation_m1/2.) +1 - - #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) - #########else: - #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.35) @@ -4126,11 +4309,6 @@ def dilate_textline_contours(self,all_found_textline_polygons): inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4146,11 +4324,6 @@ def dilate_textline_contours(self,all_found_textline_polygons): all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - - - - - def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -4403,12 +4576,12 @@ def run(self): t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4537,9 +4710,10 @@ def run(self): #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - txt_con_org = self.dilate_textregions_contours(txt_con_org) contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From ad323162173f651e9c5f2cb28804c23a582432d5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 20:59:01 +0200 Subject: [PATCH 37/64] updating light version --- qurator/eynollah/eynollah.py | 46 +++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2fe7325..72a72d9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -541,6 +541,7 @@ def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.input_binary: @@ -611,6 +612,10 @@ def resize_and_enhance_image_with_column_classifier(self,light_version): width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) + + self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] + self.page_coord = page_coord + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) @@ -737,7 +742,7 @@ def get_image_and_scales(self, img_org, img_res, scale): def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - self.image = self.image.astype(np.uint8) + #self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1059,19 +1064,18 @@ def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, ma if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] - img = img / float(255.0) + img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - - #seg_not_base = label_p_pred[0,:,:,4] - - #seg_not_base[seg_not_base>0.4] =1 - #seg_not_base[seg_not_base<1] =0 - seg = np.argmax(label_p_pred, axis=3)[0] - #seg[seg_not_base==1]=4 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.1] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -2151,7 +2155,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 800 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2206,29 +2210,39 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + + #print(self.image_org.shape) + + #plt.imshwo(self.image_page_org_size) + #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) @@ -3195,7 +3209,7 @@ def run_enhancement(self,light_version): scale = 1 if is_image_enhanced: if self.allow_enhancement: - img_res = img_res.astype(np.uint8) + #img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) if self.plotter: self.plotter.save_enhanced_image(img_res) From 1774076f4a9536ae68d9ab0a982bb84f65c8d858 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 16:10:29 +0200 Subject: [PATCH 38/64] updating light version. Remove textlines or textregion contours inside a bigger one --- qurator/eynollah/eynollah.py | 124 ++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 72a72d9..cbc7b88 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1071,8 +1071,13 @@ def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, ma seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + seg_art = label_p_pred[0,:,:,4] - seg_art[seg_art<0.1] =0 + seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 @@ -2159,7 +2164,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1300#1500 + img_w_new = 1500#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: @@ -2222,7 +2227,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) @@ -2232,7 +2237,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay else: if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) @@ -2249,16 +2254,19 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = mask_texts_only.astype('uint8') - ##if num_col_classifier == 1 or num_col_classifier == 2: - ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + #if num_col_classifier == 1 or num_col_classifier == 2: + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4110,6 +4118,7 @@ def dilate_textregions_contours(self,all_found_textline_polygons): if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') + dilation_m1 = 5 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4267,7 +4276,6 @@ def dilate_textline_contours(self,all_found_textline_polygons): for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4303,7 +4311,7 @@ def dilate_textline_contours(self,all_found_textline_polygons): if dilation_m1<4: dilation_m1 = 4 #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 + dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: @@ -4339,6 +4347,100 @@ def dilate_textline_contours(self,all_found_textline_polygons): all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None, type_contour="textregion"): + if type_contour=="textregion": + areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] + contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] + + #contours_> = [contours[ind] for ind in contours_index_big] + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big ] + if marginal_cnts: + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts)) ] + results_marginal = np.array(results_marginal) + + if np.any(results_marginal==1): + indexes_to_be_removed.append(ind_small) + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + for ind in indexes_to_be_removed: + contours.pop(ind) + return contours + + + else: + contours_txtline_of_all_textregions = [] + + for jj in range(len(contours)): + contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] + area_tot_tot = image.shape[0]*image.shape[1] + + areas_ratio_tot = np.array(areas_tot)/ area_tot_tot + + contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] + + + for jj in range(len(contours)): + contours_in = contours[jj] + #print(len(contours_in)) + areas = [cv2.contourArea(con_ind) for con_ind in contours_in] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + + if len(areas_ratio)>=1: + #print(np.max(areas_ratio), np.min(areas_ratio)) + contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] + #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] + + if len(contours_index_small)>0: + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + + for ind in indexes_to_be_removed: + contours[jj].pop(ind) + + return contours + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4725,6 +4827,7 @@ def run(self): #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -4742,6 +4845,7 @@ def run(self): #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: From ab63d5ba408a3dfe42ee897b5e6976d4fc501bdd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 21:28:39 +0200 Subject: [PATCH 39/64] updating light version features --- qurator/eynollah/eynollah.py | 105 +++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cbc7b88..61289fa 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2225,10 +2225,13 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) @@ -2236,9 +2239,12 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) @@ -4356,6 +4362,8 @@ def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] @@ -4379,64 +4387,75 @@ def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) + indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] for ind in indexes_to_be_removed: contours.pop(ind) + return contours else: contours_txtline_of_all_textregions = [] + indexes_of_textline_tot = [] + index_textline_inside_textregion = [] for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + ind_ins = np.zeros( len(contours[jj]) ) + jj + list_ind_ins = list(ind_ins) + + ind_textline_inside_tr = np.array (range(len(contours[jj])) ) + + list_ind_textline_inside_tr = list(ind_textline_inside_tr) + + index_textline_inside_textregion = index_textline_inside_textregion + list_ind_textline_inside_tr + + indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] - areas_ratio_tot = np.array(areas_tot)/ area_tot_tot - - contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] - - - for jj in range(len(contours)): - contours_in = contours[jj] - #print(len(contours_in)) - areas = [cv2.contourArea(con_ind) for con_ind in contours_in] - area_tot = image.shape[0]*image.shape[1] + textregion_index_to_del = [] + textline_in_textregion_index_to_del = [] + for ij in range(len(contours_txtline_of_all_textregions)): - M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - areas_ratio = np.array(areas)/ area_tot + args_all.pop(ij) - if len(areas_ratio)>=1: - #print(np.max(areas_ratio), np.min(areas_ratio)) - contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] - #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] - - if len(contours_index_small)>0: - indexes_to_be_removed = [] - for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] - - results = np.array(results) + areas_without = np.array(areas_tot)[args_all] + area_of_con_interest = areas_tot[ij] + + args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + + if len(args_with_bigger_area)>0: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] + results = np.array(results) + if np.any(results==1): + #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) + textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) + textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) + #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) - if np.any(results==1): - indexes_to_be_removed.append(ind_small) - - - if len(indexes_to_be_removed)>0: - indexes_to_be_removed = np.unique(indexes_to_be_removed) - - for ind in indexes_to_be_removed: - contours[jj].pop(ind) - - return contours + uniqe_args_trs = np.unique(textregion_index_to_del) + + for ind_u_a_trs in uniqe_args_trs: + textline_in_textregion_index_to_del_ind = np.array(textline_in_textregion_index_to_del)[np.array(textregion_index_to_del)==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] + + for ittrd in textline_in_textregion_index_to_del_ind: + contours[ind_u_a_trs].pop(ittrd) + + return contours + + @@ -4852,6 +4871,8 @@ def run(self): textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) From 543ed4bc38b94acf53f48a9224b97322cade0e5b Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 2 Oct 2024 14:09:13 +0200 Subject: [PATCH 40/64] -light version need -tll to be enabled otherwise the process will be ended. --- qurator/eynollah/cli.py | 3 ++ qurator/eynollah/eynollah.py | 63 +++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b293403..4c762a8 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -227,6 +227,9 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s if textline_light and not light_version: print('Error: You used -tll to enable light textline detection but -light is not enabled') sys.exit(1) + if light_version and not textline_light: + print('Error: You used -light without -tll. Light version need light textline to be enabled.') + sys.exit(1) eynollah = Eynollah( image_filename=image, dir_out=out, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 61289fa..6b8193c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1055,6 +1055,35 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa #del model #gc.collect() return prediction_true + def do_padding_with_scale(self,img, scale): + h_n = int(img.shape[0]*scale) + w_n = int(img.shape[1]*scale) + + channel0_avg = int( np.mean(img[:,:,0]) ) + channel1_avg = int( np.mean(img[:,:,1]) ) + channel2_avg = int( np.mean(img[:,:,2]) ) + + h_diff = img.shape[0] - h_n + w_diff = img.shape[1] - w_n + + h_start = int(h_diff / 2.) + w_start = int(w_diff / 2.) + + img_res = resize_image(img, h_n, w_n) + #label_res = resize_image(label, h_n, w_n) + + img_scaled_padded = np.copy(img) + + #label_scaled_padded = np.zeros(label.shape) + + img_scaled_padded[:,:,0] = channel0_avg + img_scaled_padded[:,:,1] = channel1_avg + img_scaled_padded[:,:,2] = channel2_avg + + img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:] + #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] + + return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") @@ -4349,6 +4378,38 @@ def dilate_textline_contours(self,all_found_textline_polygons): con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 1da4b7f589af94beea75157b80c0a7ecb6a213de Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 7 Oct 2024 10:55:10 +0200 Subject: [PATCH 41/64] updating light version --- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 6b8193c..2c14ab9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -2189,7 +2189,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2299,9 +2299,9 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay mask_texts_only = mask_texts_only.astype('uint8') - #if num_col_classifier == 1 or num_col_classifier == 2: - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4153,7 +4153,7 @@ def dilate_textregions_contours(self,all_found_textline_polygons): if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') - dilation_m1 = 5 + dilation_m1 = 6 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4657,6 +4657,31 @@ def dilate_textlines(self,all_found_textline_polygons): all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + + def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): + slopes_rem = [] + all_found_textline_polygons_rem = [] + boxes_text_rem = [] + txt_con_org_rem = [] + contours_only_text_parent_rem = [] + index_by_text_par_con_rem = [] + + for i, ind_con in enumerate(all_found_textline_polygons): + if len(ind_con): + all_found_textline_polygons_rem.append(ind_con) + slopes_rem.append(slopes[i]) + boxes_text_rem.append(boxes_text[i]) + txt_con_org_rem.append(txt_con_org[i]) + contours_only_text_parent_rem.append(contours_only_text_parent[i]) + index_by_text_par_con_rem.append(index_by_text_par_con[i]) + + index_sort = np.argsort(index_by_text_par_con_rem) + indexes_new = np.array(range(len(index_by_text_par_con_rem))) + + index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] + + return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort + def run(self): """ Get image and scales, then extract the page of scanned image @@ -4923,6 +4948,9 @@ def run(self): slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") @@ -5121,6 +5149,7 @@ def run(self): all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] From 3ef4eac24ca5d876243c62860ad9d4fa05110081 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 17 Oct 2024 19:12:28 +0200 Subject: [PATCH 42/64] textlines of textregions are extracted in a faster way + early layout for all documents is done with no patches model and on rgb input --- qurator/eynollah/eynollah.py | 120 +++++++++++++++++++--------- qurator/eynollah/utils/marginals.py | 65 ++------------- 2 files changed, 89 insertions(+), 96 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2c14ab9..fd66b81 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ def __init__( self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1710,6 +1710,36 @@ def extract_text_regions(self, img, patches, cols): self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + + polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + args_textlines = np.array(range(len(polygons_of_textlines))) + all_found_textline_polygons = [] + slopes = [] + all_box_coord =[] + + for index, con_region_ind in enumerate(contours_par): + results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] + results = np.array(results) + + indexes_in = args_textlines[results==1] + + textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] + + all_found_textline_polygons.append(textlines_ins) + slopes.append(0) + + _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + + all_box_coord.append(crop_coor) + + return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))) + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") if len(contours)>15: @@ -2099,14 +2129,14 @@ def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2216,14 +2246,14 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - #print("inside bin ", time.time()-t_bin) + print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2236,7 +2266,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay else: img_bin = np.copy(img_resized) - #print("inside 1 ", time.time()-t_in) + print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2246,14 +2276,15 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #print(self.image_org.shape) + #cv2.imwrite('out_13.png', self.image_page_org_size) #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - #print("inside 2 ", time.time()-t_in) + print("inside 2 ", time.time()-t_in) if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2267,7 +2298,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2278,7 +2309,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) + print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2356,7 +2387,15 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + + textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3308,7 +3347,7 @@ def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_line if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL) + text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -3319,6 +3358,7 @@ def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_line def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_no_full_layout') + t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3328,6 +3368,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3340,7 +3381,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - + print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3350,6 +3391,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) @@ -3378,7 +3420,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - + print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3410,7 +3452,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -3751,8 +3793,10 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - - model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + if self.dir_in: + pass + else: + self.model_reading_order_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 width1 = 448#224 @@ -3793,7 +3837,7 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text img3 = img3.astype(np.uint16) - inference_bs = 4 + inference_bs = 3 input_1= np.zeros( (inference_bs, height1, width1,3)) starting_list_of_regions = [] starting_list_of_regions.append( list(range(labels_con.shape[2])) ) @@ -3835,7 +3879,7 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=model_ro_machine.predict(input_1 , verbose=0) + y_pr=self.model_reading_order_machine.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -4698,16 +4742,16 @@ def run(self): t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - #print("text region early -11 in %.1fs", time.time() - t0) + print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) + print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4720,17 +4764,17 @@ def run(self): textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) + print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) + print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4751,7 +4795,7 @@ def run(self): continue else: return pcgts - #print("text region early in %.1fs", time.time() - t0) + print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4793,7 +4837,8 @@ def run(self): image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) + print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) @@ -4807,7 +4852,7 @@ def run(self): if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - #print("text region early 2 in %.1fs", time.time() - t0) + print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4929,7 +4974,7 @@ def run(self): else: pass - #print("text region early 3 in %.1fs", time.time() - t0) + print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) @@ -4938,14 +4983,17 @@ def run(self): #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) + print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) + print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) @@ -4974,7 +5022,7 @@ def run(self): all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) + print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5134,7 +5182,7 @@ def run(self): self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/marginals.py b/qurator/eynollah/utils/marginals.py index 7c43de6..984156f 100644 --- a/qurator/eynollah/utils/marginals.py +++ b/qurator/eynollah/utils/marginals.py @@ -8,7 +8,7 @@ from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): +def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_version=False, kernel=None): mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) mask_marginals=mask_marginals.astype(np.uint8) @@ -49,27 +49,14 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N if thickness_along_y_percent>=14: text_with_lines_y_rev=-1*text_with_lines_y[:] - #print(text_with_lines_y) - #print(text_with_lines_y_rev) - - - - - #plt.plot(text_with_lines_y) - #plt.show() - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - #plt.plot(text_with_lines_y_rev) - #plt.show() sigma_gaus=1 region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) - #plt.plot(region_sum_0_rev) - #plt.show() region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) @@ -78,43 +65,17 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N last_nonzero=len(region_sum_0)-last_nonzero - ##img_sum_0_smooth_rev=-region_sum_0 - - mid_point=(last_nonzero+first_nonzero)/2. one_third_right=(last_nonzero-mid_point)/3.0 one_third_left=(mid_point-first_nonzero)/3.0 - #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) - - - - peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - - peaks=np.array(peaks) - - - #print(region_sum_0[peaks]) - ##plt.plot(region_sum_0) - ##plt.plot(peaks,region_sum_0[peaks],'*') - ##plt.show() - #print(first_nonzero,last_nonzero,peaks) peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] @@ -137,9 +98,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N - - #print(point_left,point_right) - #print(text_regions.shape) if point_right>=mask_marginals.shape[1]: point_right=mask_marginals.shape[1]-1 @@ -148,10 +106,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N except: mask_marginals[:,:]=1 - #print(mask_marginals.shape,point_left,point_right,'nadosh') mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) - #print(mask_marginals_rotated.shape,'nadosh') mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 @@ -168,11 +124,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N max_point_of_right_marginal=text_regions.shape[1]-1 - #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') - #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') - #plt.imshow(mask_marginals) - #plt.show() - #plt.imshow(mask_marginals_rotated) #plt.show() @@ -195,10 +146,9 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[] for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') + if x_width_mar>16 and y_height_mar/x_width_mar<18: marginlas_should_be_main_text.append(polygons_of_marginals[i]) if x_min_text_only[i]<(mid_point-one_third_left): @@ -220,18 +170,13 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[text_regions.shape[1]-1] - - - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - - #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - #print(np.unique(text_regions)) #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 From f93fa12441104324ee8e7ced0488b44827704de3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 18 Oct 2024 09:14:42 +0200 Subject: [PATCH 43/64] doing more multiprocessing in order to make the process faster --- qurator/eynollah/eynollah.py | 92 +++--- qurator/eynollah/utils/__init__.py | 93 +----- qurator/eynollah/utils/contour.py | 73 ++++- qurator/eynollah/utils/separate_lines.py | 386 +++++++++++++++++------ 4 files changed, 407 insertions(+), 237 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fd66b81..79724cc 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2253,7 +2253,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - print("inside bin ", time.time()-t_bin) + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2266,7 +2266,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay else: img_bin = np.copy(img_resized) - print("inside 1 ", time.time()-t_in) + #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2281,7 +2281,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - print("inside 2 ", time.time()-t_in) + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier >= 2: @@ -2309,7 +2309,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - print("inside 3 ", time.time()-t_in) + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2395,7 +2395,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #plt.imshow(textline_mask_tot_ea) #plt.show() - print("inside 4 ", time.time()-t_in) + #print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3368,7 +3368,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) - print(time.time()-t_0_box,'time box in 1') + #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3381,7 +3381,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - print(time.time()-t_0_box,'time box in 2') + #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3391,36 +3391,41 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - print(time.time()-t_0_box,'time box in 3') + #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) + #print(time.time()-t_0_box,'time box in 3.1') - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + if self.tables: + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + #print(time.time()-t_0_box,'time box in 3.2') + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - print(time.time()-t_0_box,'time box in 4') + if self.tables: + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables =np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3452,7 +3457,7 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - print(time.time()-t_0_box,'time box in 5') + #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -4742,16 +4747,16 @@ def run(self): t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - print("text region early -11 in %.1fs", time.time() - t0) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - print("text region early -1 in %.1fs", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - print("text region early -2 in %.1fs", time.time() - t0) + #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4764,17 +4769,17 @@ def run(self): textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) - print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - print("text region early -3 in %.1fs", time.time() - t0) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - print("text region early -4 in %.1fs", time.time() - t0) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4795,7 +4800,7 @@ def run(self): continue else: return pcgts - print("text region early in %.1fs", time.time() - t0) + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4837,7 +4842,7 @@ def run(self): image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - print("text region early 2 marginal in %.1fs", time.time() - t0) + #print("text region early 2 marginal in %.1fs", time.time() - t0) ## birdan sora chock chakir t1 = time.time() if not self.full_layout: @@ -4852,7 +4857,7 @@ def run(self): if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - print("text region early 2 in %.1fs", time.time() - t0) + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4974,19 +4979,20 @@ def run(self): else: pass - print("text region early 3 in %.1fs", time.time() - t0) + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - print("text region early 4 in %.1fs", time.time() - t0) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - print("text region early 5 in %.1fs", time.time() - t0) + #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: @@ -5022,7 +5028,7 @@ def run(self): all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - print("text region early 6 in %.1fs", time.time() - t0) + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5182,7 +5188,7 @@ def run(self): self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - print("text region early 7 in %.1fs", time.time() - t0) + #print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 8705ecf..6219df2 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -7,7 +7,7 @@ import imutils from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - +import time from .is_nan import isNaN from .contour import (contours_in_same_horizon, find_new_features_of_contours, @@ -1342,7 +1342,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): return peaks_neg_tot def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): - + t_ins_c0 = time.time() separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 separators_closeup[0:110,:,:]=0 @@ -1356,84 +1356,47 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) - - - - ##_,separators_closeup_n=self.combine_hor_lines_and_delete_cross_points_and_get_lines_features_back(region_pre_p[:,:,0]) separators_closeup_n=np.copy(separators_closeup) separators_closeup_n=separators_closeup_n.astype(np.uint8) - ##plt.imshow(separators_closeup_n[:,:,0]) - ##plt.show() separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==0]=255 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==-255]=0 - - - #separators_closeup_n_binary=(separators_closeup_n_binary[:,:]==2)*1 - - #gray = cv2.cvtColor(separators_closeup_n, cv2.COLOR_BGR2GRAY) - - ### - - #print(separators_closeup_n_binary.shape) + gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) gray_early=gray_early.astype(np.uint8) - #print(gray_early.shape,'burda') imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - #print('burda2') ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - #print('burda3') contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - #slope_lines_e,dist_x_e, x_min_main_e ,x_max_main_e ,cy_main_e,slope_lines_org_e,y_min_main_e, y_max_main_e, cx_main_e=self.find_features_of_lines(contours_line_e) - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) dist_ye=y_max_maine-y_min_maine - #print(y_max_maine-y_min_maine,'y') - #print(dist_xe,'x') args_e=np.array(range(len(contours_line_e))) args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - #print(args_hor_e,'jidi',len(args_hor_e),'jilva') cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - #print(len(slope_linese),'lieee') figs_e=np.zeros(thresh_e.shape) figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - #plt.imshow(figs_e) - #plt.show() - - ### - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) - - #plt.imshow(gray) - #plt.show() - - bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ cv2.THRESH_BINARY, 15, -2) - ##plt.imshow(bw[:,:]) - ##plt.show() - + horizontal = np.copy(bw) vertical = np.copy(bw) @@ -1451,16 +1414,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - ### - #print(np.unique(horizontal),'uni') horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) - ### - - - - #plt.imshow(horizontal) - #plt.show() rows = vertical.shape[0] verticalsize = rows // 30 @@ -1471,35 +1425,21 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, vertical = cv2.dilate(vertical, verticalStructure) vertical = cv2.dilate(vertical,kernel,iterations = 1) - # Show extracted vertical lines horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) - - #plt.imshow(horizontal) - #plt.show() - #print(vertical.shape,np.unique(vertical),'verticalvertical') separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - ##plt.imshow(separators_closeup_new) - ##plt.show() - ##separators_closeup_n vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) vertical=vertical.astype(np.uint8) - ##plt.plot(vertical[:,:,0].sum(axis=0)) - ##plt.show() - - #plt.plot(vertical[:,:,0].sum(axis=1)) - #plt.show() - imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) - #print(slope_lines,'vertical') + args=np.array( range(len(slope_lines) )) args_ver=args[slope_lines==1] dist_x_ver=dist_x[slope_lines==1] @@ -1512,9 +1452,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, len_y=separators_closeup.shape[0]/3.0 - #plt.imshow(horizontal) - #plt.show() - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) horizontal=horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) @@ -1582,8 +1519,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, matrix_of_lines_ch[len(cy_main_hor):,9]=1 - - if contours_h is not None: slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) @@ -1629,8 +1564,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - - regions_without_separators=return_regions_without_separators(region_pre_p) @@ -1640,19 +1573,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin_fin=[] for itiles in args_big_parts: - - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - #image_page_background_zero_tile=image_page_background_zero[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:] - - #print(regions_without_separators_tile.shape) - ##plt.imshow(regions_without_separators_tile) - ##plt.show() - - #num_col, peaks_neg_fin=self.find_num_col(regions_without_separators_tile,multiplier=6.0) - - #regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) - # + try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) except: @@ -1670,9 +1592,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin=peaks_neg_fin[peaks_neg_fin<=(vertical.shape[1]-500)] peaks_neg_fin_fin=peaks_neg_fin[:] - #print(peaks_neg_fin_fin,'peaks_neg_fin_fintaza') - - return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index 53b39b5..8a92ace 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -263,7 +263,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org -def get_textregion_contours_in_org_image_light(cnts, img, slope_first): +def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): h_o = img.shape[0] w_o = img.shape[1] @@ -278,14 +278,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) - # plt.imshow(img_copy) - # plt.show() - - # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') - # plt.imshow(img_copy) - # plt.show() img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) @@ -300,6 +293,70 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): return cnts_org +def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] +def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first): + contours_textregion_per_each_subprocess = [] + index_by_text_region_contours = [] + for mv in range(len(contours_par_per_process)): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + + img_copy = rotation_image_new(img_copy, -slope_first) + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + contours_textregion_per_each_subprocess.append(cont_int[0]*6) + index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) + + queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours]) + +def get_textregion_contours_in_org_image_light(cnts, img, slope_first): + num_cores = cpu_count() + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(cnts), num_cores + 1) + indexes_by_text_con = np.array(range(len(cnts))) + + h_o = img.shape[0] + w_o = img.shape[1] + + img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST) + ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) + #cnts = cnts/2 + cnts = [(i/ 6).astype(np.int32) for i in cnts] + + for i in range(num_cores): + contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first))) + + for i in range(num_cores): + processes[i].start() + + cnts_org = [] + all_index_text_con = [] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + contours_for_subprocess = list_all_par[0] + indexes_for_subprocess = list_all_par[1] + for j in range(len(contours_for_subprocess)): + cnts_org.append(contours_for_subprocess[j]) + all_index_text_con.append(indexes_for_subprocess[j]) + for i in range(num_cores): + processes[i].join() + + cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con) + + return cnts_org + def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index 1004a92..f8df33f 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -3,7 +3,8 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d import os - +from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool from .rotate import rotate_image from .contour import ( return_parent_contours, @@ -1569,8 +1570,21 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): +def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): + angels_per_each_subprocess = [] + for mv in range(len(angels_per_process)): + img_rot=rotate_image(img_resized,angels_per_process[mv]) + img_rot[img_rot!=0]=1 + try: + var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + except: + var_spectrum=0 + angels_per_each_subprocess.append(var_spectrum) + + queue_of_all_params.put([angels_per_each_subprocess]) +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): + num_cores = cpu_count() if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1603,22 +1617,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##print(rot,var_spectrum,'var_spectrum') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ###img_rot[img_rot!=0]=1 + ####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(var_spectrum,'var_spectrum') + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(rot,var_spectrum,'var_spectrum') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) + + + try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1628,17 +1664,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1650,24 +1707,46 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) + + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - var_res=[] + ##var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ###plt.imshow(img_rot) + ###plt.show() + ##img_rot[img_rot!=0]=1 + ###neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###print(var_spectrum,'var_spectrum') + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 + ##except: + ##var_spectrum=0 - var_res.append(var_spectrum) + ##var_res.append(var_spectrum) if plotter: @@ -1681,17 +1760,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1701,18 +1801,41 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + + ###var_res=[] + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1720,20 +1843,42 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals ang_int=0 else: angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) - var_res=[] indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ####var_res=[] + + ####for rot in angels: + ####img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ####img_rot[img_rot!=0]=1 + #####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(var_spectrum,'var_spectrum') + ####try: + ####var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####except: + ####var_spectrum=0 + ####var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1750,19 +1895,40 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###var_res=[] + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) @@ -1773,22 +1939,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,25,int(n_tot_angles/2.)+10) - + indexer=0 + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 + ###var_res=[] - var_res.append(var_spectrum) + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] From 70772d41042df2415a0918d99f51cb183db36fe5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 21 Oct 2024 23:46:38 +0200 Subject: [PATCH 44/64] binarization as a standalone command --- qurator/eynollah/cli.py | 33 +++ qurator/eynollah/eynollah.py | 5 +- qurator/eynollah/sbb_binarize.py | 383 +++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 qurator/eynollah/sbb_binarize.py diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 4c762a8..0daf0c9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -2,6 +2,7 @@ import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +from qurator.eynollah.sbb_binarize import SbbBinarizer @click.group() def main(): @@ -48,6 +49,38 @@ def main(): def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): xml_files_ind = os.listdir(dir_xml) +@main.command() +@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') + +@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') + +@click.argument('input_image') + +@click.argument('output_image') +@click.option( + "--dir_in", + "-di", + help="directory of images", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--dir_out", + "-do", + help="directory where the binarized images will be written", + type=click.Path(exists=True, file_okay=False), +) + +def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): + if not dir_out and (dir_in): + print("Error: You used -di but did not set -do") + sys.exit(1) + elif dir_out and not (dir_in): + print("Error: You used -do to write out binarized images but have not set -di") + sys.exit(1) + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) + + + @main.command() @click.option( diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79724cc..e587ff3 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -240,7 +240,6 @@ def __init__( pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" @@ -4769,9 +4768,9 @@ def run(self): textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/sbb_binarize.py b/qurator/eynollah/sbb_binarize.py new file mode 100644 index 0000000..36e9ab0 --- /dev/null +++ b/qurator/eynollah/sbb_binarize.py @@ -0,0 +1,383 @@ +""" +Tool to load model and binarize a given image. +""" + +import sys +from glob import glob +from os import environ, devnull +from os.path import join +from warnings import catch_warnings, simplefilter +import os + +import numpy as np +from PIL import Image +import cv2 +environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +stderr = sys.stderr +sys.stderr = open(devnull, 'w') +import tensorflow as tf +from tensorflow.keras.models import load_model +from tensorflow.python.keras import backend as tensorflow_backend +sys.stderr = stderr + + +import logging + +def resize_image(img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + +class SbbBinarizer: + + def __init__(self, model_dir, logger=None): + self.model_dir = model_dir + self.log = logger if logger else logging.getLogger('SbbBinarizer') + + self.start_new_session() + + self.model_files = glob(self.model_dir+"/*/", recursive = True) + + self.models = [] + for model_file in self.model_files: + self.models.append(self.load_model(model_file)) + + def start_new_session(self): + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + + self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + tensorflow_backend.set_session(self.session) + + def end_session(self): + tensorflow_backend.clear_session() + self.session.close() + del self.session + + def load_model(self, model_name): + model = load_model(join(self.model_dir, model_name), compile=False) + model_height = model.layers[len(model.layers)-1].output_shape[1] + model_width = model.layers[len(model.layers)-1].output_shape[2] + n_classes = model.layers[len(model.layers)-1].output_shape[3] + return model, model_height, model_width, n_classes + + def predict(self, model_in, img, use_patches, n_batch_inference=5): + tensorflow_backend.set_session(self.session) + model, model_height, model_width, n_classes = model_in + + img_org_h = img.shape[0] + img_org_w = img.shape[1] + + if img.shape[0] < model_height and img.shape[1] >= model_width: + img_padded = np.zeros(( model_height, img.shape[1], img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = 0 + + img_padded [ index_start_h: index_start_h+img.shape[0], :, : ] = img[:,:,:] + + elif img.shape[0] >= model_height and img.shape[1] < model_width: + img_padded = np.zeros(( img.shape[0], model_width, img.shape[2] )) + + index_start_h = 0 + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ :, index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + + elif img.shape[0] < model_height and img.shape[1] < model_width: + img_padded = np.zeros(( model_height, model_width, img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ index_start_h: index_start_h+img.shape[0], index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + else: + index_start_h = 0 + index_start_w = 0 + img_padded = np.copy(img) + + + img = np.copy(img_padded) + + + + if use_patches: + + margin = int(0.1 * model_width) + + width_mid = model_width - 2 * margin + height_mid = model_height - 2 * margin + + + img = img / float(255.0) + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - model_width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - model_height + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + + + prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] + prediction_true = prediction_true.astype(np.uint8) + + else: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / float(255.0) + img = resize_image(img, model_height, model_width) + + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true[:,:,0] + + def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): + print(dir_in,'dir_in') + if not dir_in: + if (image is not None and image_path is not None) or \ + (image is None and image_path is None): + raise ValueError("Must pass either a opencv2 image or an image_path") + if image_path is not None: + image = cv2.imread(image_path) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + if save: + cv2.imwrite(save, img_last) + return img_last + else: + ls_imgs = os.listdir(dir_in) + for image_name in ls_imgs: + image_stem = image_name.split('.')[0] + print(image_name,'image_name') + image = cv2.imread(os.path.join(dir_in,image_name) ) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + + cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) From 328d33e3dc294b4d93fcdca833ed679ee0169f9f Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 23 Oct 2024 16:55:41 +0200 Subject: [PATCH 45/64] =?UTF-8?q?Temporary=20commit=20=E2=80=93=20textline?= =?UTF-8?q?=20prediction=20without=20patches?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/eynollah/eynollah.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..6ee3dc7 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2120,7 +2120,7 @@ def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier else: thresholding_for_artificial_class_in_light_version = False if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] @@ -3311,7 +3311,8 @@ def run_textline(self, image_page, num_col_classifier=None): scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) From 82281bd6cfa218e7e434fe8da535fae394d5f59c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 25 Oct 2024 19:42:48 +0200 Subject: [PATCH 46/64] fixing a bug occuring with reading order + Slro option with no patch textline model and thresholding artificial class --- qurator/eynollah/eynollah.py | 79 +++++++++++++++++------------- qurator/eynollah/utils/__init__.py | 21 ++++---- qurator/eynollah/utils/xml.py | 2 +- 3 files changed, 58 insertions(+), 44 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..03252fb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ def __init__( self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ def __init__( self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -816,6 +816,14 @@ def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_pa verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1546,7 +1554,7 @@ def extract_text_regions_new(self, img, patches, cols): pass else: img = otsu_copy_binary(img) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: @@ -1605,9 +1613,12 @@ def extract_text_regions_new(self, img, patches, cols): img = img.astype(np.uint8) marginal_of_patch_percent = 0.1 - + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + + ##prediction_regions = self.do_prediction(False, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -2148,7 +2159,7 @@ def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier if not thresholding_for_artificial_class_in_light_version: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 @@ -2245,26 +2256,27 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) - else: - img_bin = np.copy(img_resized) - + ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): + ###if not self.dir_in: + ###model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + ###else: + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + ####print("inside bin ", time.time()-t_bin) + ###prediction_bin=prediction_bin[:,:,0] + ###prediction_bin = (prediction_bin[:,:]==0)*1 + ###prediction_bin = prediction_bin*255 + + ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + ###prediction_bin = prediction_bin.astype(np.uint16) + ####img= np.copy(prediction_bin) + ###img_bin = np.copy(prediction_bin) + ###else: + ###img_bin = np.copy(img_resized) + + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) @@ -3311,7 +3323,8 @@ def run_textline(self, image_page, num_col_classifier=None): scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3564,9 +3577,9 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) if self.light_version: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, False, cols=num_col_classifier) else: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model @@ -3590,7 +3603,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4768,9 +4781,9 @@ def run(self): textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 6219df2..e7cbbea 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -1204,17 +1204,12 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): top = peaks_neg_new[i] down = peaks_neg_new[i + 1] - # print(top,down,'topdown') - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - # print(top,down) - # print(cys_in,'cyyyins') - # print(indexes_in,'indexes') sorted_inside = np.argsort(cxs_in) ind_in_int = indexes_in[sorted_inside] @@ -1228,11 +1223,17 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # print(peaks_neg_new,'peaks') - # print(final_indexers_sorted,'indexsorted') - # print(final_types,'types') - # print(final_index_type,'final_index_type') - + # This fix is applied if the sum of the lengths of contours and contours_h does not match final_indexers_sorted. However, this is not the optimal solution.. + if (len(cy_main)+len(cy_header) ) == len(final_index_type): + pass + else: + indexes_missed = set(list( np.array( range((len(cy_main)+len(cy_header) ) )) )) - set(final_indexers_sorted) + for ind_missed in indexes_missed: + final_indexers_sorted.append(ind_missed) + final_types.append(1) + final_index_type.append(ind_missed) + + return final_indexers_sorted, matrix_of_orders, final_types, final_index_type def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index 0386b25..bd95702 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -72,7 +72,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region index_of_types_2 = index_of_types[kind_of_texts == 2] indexes_sorted_2 = indexes_sorted[kind_of_texts == 2] - + counter = EynollahIdCounter(region_idx=ref_point) for idx_textregion, _ in enumerate(found_polygons_text_region): id_of_texts.append(counter.next_region_id) From 90ee2d61dc1d2ce05724d6d0f11c200ba1709108 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 28 Oct 2024 20:56:06 +0100 Subject: [PATCH 47/64] textline segmentation is masked with drop capitals --- qurator/eynollah/eynollah.py | 223 +++++++++++++++++++++-------------- 1 file changed, 135 insertions(+), 88 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1cb00c7..d0a8299 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ def __init__( self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ def __init__( self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -502,7 +502,8 @@ def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_p if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -523,7 +524,8 @@ def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -3323,7 +3325,7 @@ def run_textline(self, image_page, num_col_classifier=None): scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - patches = False + patches = True textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3634,6 +3636,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s regions_without_separators = (text_regions_p[:, :] == 1) * 1 img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) + self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables @@ -4169,7 +4172,123 @@ def return_it_in_two_groups(self,x_differential): x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] return x_differential_new - + def dilate_textregions_contours_textline_version(self,all_found_textline_polygons): + #print(all_found_textline_polygons) + + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<6: + dilation_m1 = 6 + #print(dilation_m1, 'dilation_m1') + dilation_m1 = 6 + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons def dilate_textregions_contours(self,all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): @@ -4179,9 +4298,6 @@ def dilate_textregions_contours(self,all_found_textline_polygons): area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) - #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) - x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) @@ -4235,29 +4351,6 @@ def dilate_textregions_contours(self,all_found_textline_polygons): inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###for i in range(len(x_differential)): - ###if abs_diff[i]==0: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - - ###elif abs_diff[i]!=0 and abs_diff[i]>=3: - ###if abs(x_differential[i])>abs(y_differential[i]): - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4288,21 +4381,6 @@ def dilate_textregions_contours(self,all_found_textline_polygons): indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - #print(area_scaled / area, "ratio") - #print(results,'results') - #if results[0]==1 and diff_result[-1]==-2: - ##indices_2 = indices_2[1:] - ##indices_m2 = indices_m2[1:] - - #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] - - - #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] - - #indices_2 = indices_2[:-1] - #indices_m2 = indices_m2[1:-1] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] @@ -4318,50 +4396,12 @@ def dilate_textregions_contours(self,all_found_textline_polygons): indices_2 = indices_2[:-1] - - - #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) - - - #print(diff_neg_pos,'diff') - ##print(indices_2, 'indices_2') - #indices_2 = np.array(indices_2)[diff_neg_pos>1] - #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] - #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] - - #if x_inner[-1]>=x_inner[0]: - #x_interest = np.min(x_inner) - #else: - #x_interest = np.max(x_inner) - - #if y_inner[-1]>=y_inner[0]: - #y_interest = np.min(y_inner) - #else: - #y_interest = np.max(y_inner) - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] - #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] - - #print(list(results), 'results') - #print(list(diff_result), 'diff_result') - #print(indices_2,'2') - #print(indices_m2,'-2') - #print(diff_neg_pos,'diff_neg_pos') - - ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - - #con_scaled[-1,0, 1] = con_scaled[0,0, 1] - #con_scaled[-1,0, 0] = con_scaled[0,0, 0] - ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4865,6 +4905,12 @@ def run(self): img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -5018,7 +5064,8 @@ def run(self): #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) From 438df5228705e93f52d43a17a9284cc199fb97f4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 00:52:09 +0100 Subject: [PATCH 48/64] updating --- qurator/eynollah/eynollah.py | 8 +++++--- qurator/eynollah/utils/__init__.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index d0a8299..543ed92 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1726,6 +1726,7 @@ def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] @@ -3605,7 +3606,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4901,6 +4902,7 @@ def run(self): polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: + cv2.imwrite('dewar_page.png', image_page) if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) @@ -5067,7 +5069,7 @@ def run(self): #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -5261,7 +5263,7 @@ def run(self): all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index e7cbbea..29f80b4 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,7 +792,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: From e796a99c5cae651ae1601f2033feecd695b382f2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 15:02:50 +0100 Subject: [PATCH 49/64] updating inference for early layout in the case of documents with number of columns bigger than 2 --- qurator/eynollah/eynollah.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 543ed92..0a1c2b1 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2296,9 +2296,8 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2307,12 +2306,12 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2320,7 +2319,7 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) From 751b0102f7787f2ab8a45e3ecc4604e7c107e1e6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 5 Nov 2024 19:50:18 +0100 Subject: [PATCH 50/64] updating early layout inference for light version --- qurator/eynollah/eynollah.py | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0a1c2b1..9095c15 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ def __init__( self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,7 +253,7 @@ def __init__( self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -743,7 +743,7 @@ def get_image_and_scales(self, img_org, img_res, scale): def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - #self.image = self.image.astype(np.uint8) + self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1298,20 +1298,25 @@ def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, ma seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + ###seg[seg_art==1]=4 + ##seg_not_base = label_p_pred[:,:,:,4] + ##seg_not_base[seg_not_base>0.03] =1 + ##seg_not_base[seg_not_base<1] =0 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ##seg_background = label_p_pred[:,:,:,0] + ##seg_background[seg_background>0.25] =1 + ##seg_background[seg_background<1] =0 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 + seg[seg_art==1]=4 + ##seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[:,:,:,2] @@ -2300,26 +2305,26 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -4595,7 +4600,7 @@ def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] From 8409de0e58457f2ae4661a42ce8942e96794f2e8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sun, 10 Nov 2024 19:34:43 +0100 Subject: [PATCH 51/64] sbb_binarization is integrated into eynollah works in framework of ocrd - sbb_binarization in ocrd works for individual images by the way as standalone flowing from directory can be used now. For eynollah in ocrd framework I have added -light version as default parameter. --- pyproject.toml | 1 + src/eynollah/eynollah.py | 1 - src/eynollah/ocrd-tool-binarization.json | 47 +++++++ src/eynollah/ocrd-tool.json | 10 ++ src/eynollah/ocrd_cli_binarization.py | 158 +++++++++++++++++++++++ src/eynollah/processor.py | 2 + 6 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 src/eynollah/ocrd-tool-binarization.json create mode 100644 src/eynollah/ocrd_cli_binarization.py diff --git a/pyproject.toml b/pyproject.toml index 67a420d..b056cb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ classifiers = [ [project.scripts] eynollah = "eynollah.cli:main" ocrd-eynollah-segment = "eynollah.ocrd_cli:main" +ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:cli" [project.urls] Homepage = "https://github.com/qurator-spk/eynollah" diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0d0d683..29d2788 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4964,7 +4964,6 @@ def run(self): polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: - cv2.imwrite('dewar_page.png', image_page) if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) diff --git a/src/eynollah/ocrd-tool-binarization.json b/src/eynollah/ocrd-tool-binarization.json new file mode 100644 index 0000000..1711e89 --- /dev/null +++ b/src/eynollah/ocrd-tool-binarization.json @@ -0,0 +1,47 @@ +{ + "version": "0.1.0", + "git_url": "https://github.com/qurator-spk/sbb_binarization", + "tools": { + "ocrd-sbb-binarize": { + "executable": "ocrd-sbb-binarize", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", + "categories": ["Image preprocessing"], + "steps": ["preprocessing/optimization/binarization"], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "operation_level": { + "type": "string", + "enum": ["page", "region"], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, + "model": { + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "required": true + } + }, + "resources": [ + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "saved_model_2020_01_16", + "size": 563147331, + "description": "default models provided by github.com/qurator-spk (SavedModel format)" + }, + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133230419, + "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" + } + ] + } + } +} diff --git a/src/eynollah/ocrd-tool.json b/src/eynollah/ocrd-tool.json index b840005..9eb8932 100644 --- a/src/eynollah/ocrd-tool.json +++ b/src/eynollah/ocrd-tool.json @@ -28,6 +28,16 @@ "type": "boolean", "default": true, "description": "Try to detect all element subtypes, including drop-caps and headings" + }, + "light_version": { + "type": "boolean", + "default": true, + "description": "Try to detect all element subtypes in light version" + }, + "textline_light": { + "type": "boolean", + "default": true, + "description": "Light version need textline light" }, "tables": { "type": "boolean", diff --git a/src/eynollah/ocrd_cli_binarization.py b/src/eynollah/ocrd_cli_binarization.py new file mode 100644 index 0000000..6a8bbdc --- /dev/null +++ b/src/eynollah/ocrd_cli_binarization.py @@ -0,0 +1,158 @@ +from os import environ +from os.path import join +from pathlib import Path +from pkg_resources import resource_string +from json import loads + +from PIL import Image +import numpy as np +import cv2 +from click import command + +from ocrd_utils import ( + getLogger, + assert_file_grp_cardinality, + make_file_id, + MIMETYPE_PAGE +) +from ocrd import Processor +from ocrd_modelfactory import page_from_file +from ocrd_models.ocrd_page import AlternativeImageType, to_xml +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor + +from .sbb_binarize import SbbBinarizer + +OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool-binarization.json').decode('utf8')) +TOOL = 'ocrd-sbb-binarize' + +def cv2pil(img): + return Image.fromarray(img.astype('uint8')) + +def pil2cv(img): + # from ocrd/workspace.py + color_conversion = cv2.COLOR_GRAY2BGR if img.mode in ('1', 'L') else cv2.COLOR_RGB2BGR + pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) + return cv2.cvtColor(pil_as_np_array, color_conversion) + +class SbbBinarizeProcessor(Processor): + + def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] + kwargs['version'] = OCRD_TOOL['version'] + super().__init__(*args, **kwargs) + if hasattr(self, 'output_file_grp'): + # processing context + self.setup() + + def setup(self): + """ + Set up the model prior to processing. + """ + LOG = getLogger('processor.SbbBinarize.__init__') + if not 'model' in self.parameter: + raise ValueError("'model' parameter is required") + # resolve relative path via environment variable + model_path = Path(self.parameter['model']) + if not model_path.is_absolute(): + if 'SBB_BINARIZE_DATA' in environ and environ['SBB_BINARIZE_DATA']: + LOG.info("Environment variable SBB_BINARIZE_DATA is set to '%s'" \ + " - prepending to model value '%s'. If you don't want this mechanism," \ + " unset the SBB_BINARIZE_DATA environment variable.", + environ['SBB_BINARIZE_DATA'], model_path) + model_path = Path(environ['SBB_BINARIZE_DATA']).joinpath(model_path) + model_path = model_path.resolve() + if not model_path.is_dir(): + raise FileNotFoundError("Does not exist or is not a directory: %s" % model_path) + # resolve relative path via OCR-D ResourceManager + model_path = self.resolve_resource(str(model_path)) + self.binarizer = SbbBinarizer(model_dir=model_path, logger=LOG) + + def process(self): + """ + Binarize images with sbb_binarization (based on selectional auto-encoders). + + For each page of the input file group, open and deserialize input PAGE-XML + and its respective images. Then iterate over the element hierarchy down to + the requested ``operation_level``. + + For each segment element, retrieve a raw (non-binarized) segment image + according to the layout annotation (from an existing ``AlternativeImage``, + or by cropping into the higher-level images, and deskewing when applicable). + + Pass the image to the binarizer (which runs in fixed-size windows/patches + across the image and stitches the results together). + + Serialize the resulting bilevel image as PNG file and add it to the output + file group (with file ID suffix ``.IMG-BIN``) along with the output PAGE-XML + (referencing it as new ``AlternativeImage`` for the segment element). + + Produce a new PAGE output file by serialising the resulting hierarchy. + """ + LOG = getLogger('processor.SbbBinarize') + assert_file_grp_cardinality(self.input_file_grp, 1) + assert_file_grp_cardinality(self.output_file_grp, 1) + + oplevel = self.parameter['operation_level'] + + for n, input_file in enumerate(self.input_files): + file_id = make_file_id(input_file, self.output_file_grp) + page_id = input_file.pageId or input_file.ID + LOG.info("INPUT FILE %i / %s", n, page_id) + pcgts = page_from_file(self.workspace.download_file(input_file)) + self.add_metadata(pcgts) + pcgts.set_pcGtsId(file_id) + page = pcgts.get_Page() + page_image, page_xywh, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') + + if oplevel == 'page': + LOG.info("Binarizing on 'page' level in page '%s'", page_id) + bin_image = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) + # update METS (add the image file): + bin_image_path = self.workspace.save_image_file(bin_image, + file_id + '.IMG-BIN', + page_id=input_file.pageId, + file_grp=self.output_file_grp) + page.add_AlternativeImage(AlternativeImageType(filename=bin_image_path, comments='%s,binarized' % page_xywh['features'])) + + elif oplevel == 'region': + regions = page.get_AllRegions(['Text', 'Table'], depth=1) + if not regions: + LOG.warning("Page '%s' contains no text/table regions", page_id) + for region in regions: + region_image, region_xywh = self.workspace.image_from_segment(region, page_image, page_xywh, feature_filter='binarized') + region_image_bin = cv2pil(binarizer.run(image=pil2cv(region_image), use_patches=True)) + region_image_bin_path = self.workspace.save_image_file( + region_image_bin, + "%s_%s.IMG-BIN" % (file_id, region.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + region.add_AlternativeImage( + AlternativeImageType(filename=region_image_bin_path, comments='%s,binarized' % region_xywh['features'])) + + elif oplevel == 'line': + region_line_tuples = [(r.id, r.get_TextLine()) for r in page.get_AllRegions(['Text'], depth=0)] + if not region_line_tuples: + LOG.warning("Page '%s' contains no text lines", page_id) + for region_id, line in region_line_tuples: + line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') + line_image_bin = cv2pil(binarizer.run(image=pil2cv(line_image), use_patches=True)) + line_image_bin_path = self.workspace.save_image_file( + line_image_bin, + "%s_%s_%s.IMG-BIN" % (file_id, region_id, line.id), + page_id=input_file.pageId, + file_grp=self.output_file_grp) + line.add_AlternativeImage( + AlternativeImageType(filename=line_image_bin_path, comments='%s,binarized' % line_xywh['features'])) + + self.workspace.add_file( + ID=file_id, + file_grp=self.output_file_grp, + pageId=input_file.pageId, + mimetype=MIMETYPE_PAGE, + local_filename=join(self.output_file_grp, file_id + '.xml'), + content=to_xml(pcgts)) + +@command() +@ocrd_cli_options +def cli(*args, **kwargs): + return ocrd_cli_wrap_processor(SbbBinarizeProcessor, *args, **kwargs) diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index 1bd190e..ed510c8 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -49,6 +49,8 @@ def process(self): 'curved_line': self.parameter['curved_line'], 'full_layout': self.parameter['full_layout'], 'allow_scaling': self.parameter['allow_scaling'], + 'light_version': self.parameter['light_version'], + 'textline_light': self.parameter['textline_light'], 'headers_off': self.parameter['headers_off'], 'tables': self.parameter['tables'], 'override_dpi': self.parameter['dpi'], From 1ae77e61c854b03c7de29eaf99592186dd19fc74 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:11:36 +0100 Subject: [PATCH 52/64] Update requirements.txt --- requirements.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f01d319..f4ab5eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,10 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow == 2.12.1 +tensorflow < 2.13 imutils >= 0.5.3 matplotlib setuptools >= 50 +transformers +torch +numba From 22b0b07a733052390ac0b00822f6e662686fbcc7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 11 Nov 2024 19:01:40 +0100 Subject: [PATCH 53/64] drop capital and marginals extraction is updated --- src/eynollah/eynollah.py | 6 +- src/eynollah/utils/__init__.py | 20 +++++- src/eynollah/utils/marginals.py | 112 ++++++++++++++++++++------------ 3 files changed, 90 insertions(+), 48 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 29d2788..2c3965a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -249,7 +249,7 @@ def __init__( self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -258,7 +258,7 @@ def __init__( self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1__4_3_091124"#"/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -3653,7 +3653,7 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model, text_regions_p) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 29f80b4..d7f9ccd 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -775,7 +775,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label): +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p): drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -791,12 +791,26 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) + mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w] = text_regions_p[y : y + h, x : x + w] + + all_drop_capital_pixels_which_is_text_in_early_lo = np.sum( mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]==1 ) + + mask_of_drop_cpaital_in_early_layout[y : y + h, x : x + w]=1 + all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1 ) + + percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels) + + + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.6 and percent_text_to_all_in_drop>=0.3: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 0] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == 4] = drop_capital_label# images + #layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 984156f..a29e50d 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -2,8 +2,6 @@ import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - - from .contour import find_new_features_of_contours, return_contours_of_interested_region from .resize import resize_image from .rotate import rotate_image @@ -123,62 +121,92 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve if max_point_of_right_marginal>=text_regions.shape[1]: max_point_of_right_marginal=text_regions.shape[1]-1 + if light_version: + text_regions_org = np.copy(text_regions) + text_regions[text_regions[:,:]==1]=4 + + pixel_img=4 + min_area_text=0.00001 + + polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals,1,min_area_text) + + polygon_mask_marginals_rotated = polygon_mask_marginals_rotated[0] - #plt.imshow(mask_marginals_rotated) - #plt.show() + polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) - text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) - #plt.imshow(text_regions) - #plt.show() + text_regions[(text_regions[:,:]==4)]=1 - pixel_img=4 - min_area_text=0.00001 - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + marginlas_should_be_main_text=[] - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + x_min_marginals_left=[] + x_min_marginals_right=[] - text_regions[(text_regions[:,:]==4)]=1 + for i in range(len(cx_text_only)): + results = cv2.pointPolygonTest(polygon_mask_marginals_rotated, (cx_text_only[i], cy_text_only[i]), False) - marginlas_should_be_main_text=[] + if results == -1: + marginlas_should_be_main_text.append(polygons_of_marginals[i]) - x_min_marginals_left=[] - x_min_marginals_right=[] - for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) - y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - if x_width_mar>16 and y_height_mar/x_width_mar<18: - marginlas_should_be_main_text.append(polygons_of_marginals[i]) - if x_min_text_only[i]<(mid_point-one_third_left): - x_min_marginals_left_new=x_min_text_only[i] - if len(x_min_marginals_left)==0: - x_min_marginals_left.append(x_min_marginals_left_new) - else: - x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) - else: - x_min_marginals_right_new=x_min_text_only[i] - if len(x_min_marginals_right)==0: - x_min_marginals_right.append(x_min_marginals_right_new) + text_regions_org=cv2.fillPoly(text_regions_org, pts =marginlas_should_be_main_text, color=(4,4)) + text_regions = np.copy(text_regions_org) + + + else: + + text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + + pixel_img=4 + min_area_text=0.00001 + + polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + + text_regions[(text_regions[:,:]==4)]=1 + + marginlas_should_be_main_text=[] + + x_min_marginals_left=[] + x_min_marginals_right=[] + + for i in range(len(cx_text_only)): + x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) + y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) + + if x_width_mar>16 and y_height_mar/x_width_mar<18: + marginlas_should_be_main_text.append(polygons_of_marginals[i]) + if x_min_text_only[i]<(mid_point-one_third_left): + x_min_marginals_left_new=x_min_text_only[i] + if len(x_min_marginals_left)==0: + x_min_marginals_left.append(x_min_marginals_left_new) + else: + x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) else: - x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) + x_min_marginals_right_new=x_min_text_only[i] + if len(x_min_marginals_right)==0: + x_min_marginals_right.append(x_min_marginals_right_new) + else: + x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - if len(x_min_marginals_left)==0: - x_min_marginals_left=[0] - if len(x_min_marginals_right)==0: - x_min_marginals_right=[text_regions.shape[1]-1] + if len(x_min_marginals_left)==0: + x_min_marginals_left=[0] + if len(x_min_marginals_right)==0: + x_min_marginals_right=[text_regions.shape[1]-1] - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 - #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - - - text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 - text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 + #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 + #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 + + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 + text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 From f43c49c5086289b695322640693a2bf4f5cfa797 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 13 Nov 2024 11:53:56 +0100 Subject: [PATCH 54/64] textlines of drop capitals are connected to corresponding textline if possible otherwise they are inserted in corresponding textregion --- src/eynollah/eynollah.py | 2 +- src/eynollah/utils/drop_capitals.py | 89 ++++++++++++++++++++--------- src/eynollah/writer.py | 6 +- 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 2c3965a..d7e389d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5176,7 +5176,7 @@ def run(self): pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) pixel_lines = 6 if not self.reading_order_machine_based: diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index e12028f..67547d3 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -4,6 +4,7 @@ find_new_features_of_contours, return_contours_of_image, return_parent_contours, + return_contours_of_interested_region, ) def adhere_drop_capital_region_into_corresponding_textline( @@ -17,6 +18,7 @@ def adhere_drop_capital_region_into_corresponding_textline( all_found_textline_polygons_h, kernel=None, curved_line=False, + textline_light=False, ): # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(all_found_textline_polygons[3]) @@ -76,7 +78,7 @@ def adhere_drop_capital_region_into_corresponding_textline( # region_with_intersected_drop=region_with_intersected_drop/3 region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8) # print(np.unique(img_con_all_copy[:,:,0])) - if curved_line: + if curved_line or textline_light: if len(region_with_intersected_drop) > 1: sum_pixels_of_intersection = [] @@ -114,12 +116,17 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #plt.imshow(img_textlines) + #plt.show() + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -130,8 +137,13 @@ def adhere_drop_capital_region_into_corresponding_textline( # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0] # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: # print('gordun1') @@ -167,14 +179,13 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = img_textlines.astype(np.uint8) - # plt.imshow(img_textlines) - # plt.show() - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + ##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + ##ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + ##contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) contours_biggest = contours_combined[np.argmax(areas_cnt_text)] @@ -186,7 +197,12 @@ def adhere_drop_capital_region_into_corresponding_textline( # print(np.shape(contours_biggest),'contours_biggest') # print(np.shape(all_found_textline_polygons[int(region_final)][arg_min])) ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: pass @@ -215,10 +231,11 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -231,7 +248,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0] ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest except: @@ -320,10 +342,12 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -336,8 +360,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass except: # print('gordun1') @@ -375,10 +403,12 @@ def adhere_drop_capital_region_into_corresponding_textline( img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255)) img_textlines = img_textlines.astype(np.uint8) - imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours_combined = return_contours_of_interested_region(img_textlines, 255, 0) + + #imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + #ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + #contours_combined, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(len(contours_combined),'len textlines mixed') areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) @@ -391,7 +421,12 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0] contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2]) - all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + if len(contours_combined)==1: + all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest + elif len(contours_combined)==2: + all_found_textline_polygons[int(region_final)].insert(arg_min, polygons_of_drop_capitals[i_drop] ) + else: + pass # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest except: diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 96441c6..496b3db 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -285,9 +285,9 @@ def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_t dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) page.add_TextRegion(dropcapital) - all_box_coord_drop = None - slopes_drop = None - self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) + ###all_box_coord_drop = None + ###slopes_drop = None + ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) From ce5b6112960f67d7819b11a9b346da0d8f5fdb4d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 14 Nov 2024 17:18:07 +0100 Subject: [PATCH 55/64] tests are passed - new models by the way should be uploaded --- tests/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_run.py b/tests/test_run.py index 2596dad..cdb715a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ from pathlib import Path from ocrd_utils import pushd_popd from tests.base import CapturingTestCase as TestCase, main -from eynollah.cli import main as eynollah_cli +from eynollah.cli import layout as eynollah_cli testdir = Path(__file__).parent.resolve() From 5fa8ca46a47be5349ad91ae0f4121cdf661bf466 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 14 Nov 2024 17:35:00 +0100 Subject: [PATCH 56/64] updating requirements --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index f4ab5eb..02450aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,6 @@ tensorflow < 2.13 imutils >= 0.5.3 matplotlib setuptools >= 50 -transformers -torch -numba +transformers <= 4.30.2 +torch <= 2.0.1 +numba <= 0.58.1 From d9f79c3404fb6372031625d357fed5727fa6ec51 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 18 Nov 2024 10:15:19 +0100 Subject: [PATCH 57/64] fixing IndexError by reading order detection --- src/eynollah/eynollah.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d7e389d..4f9eaa6 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -2678,17 +2678,29 @@ def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_on try: arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) arg_text_con_h = [] for ii in range(len(cx_text_only_h)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: arg_text_con_h.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) @@ -2742,15 +2754,22 @@ def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_on order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) @@ -2759,10 +2778,16 @@ def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_on arg_text_con_h = [] for ii in range(len(cx_text_only_h)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + (cy_text_only_h[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con_h.append(ind_min) args_contours_h = np.array(range(len(arg_text_con_h))) order_by_con_head = np.zeros(len(arg_text_con_h)) @@ -2814,6 +2839,7 @@ def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_on order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) + return order_text_new, id_of_texts_tot def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2823,10 +2849,16 @@ def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours try: arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) @@ -2868,10 +2900,16 @@ def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): + check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) + check_if_textregion_located_in_a_box = True break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + (cy_text_only[ii] - boxes[jj][2]) ** 2) for jj in range(len(boxes))] + ind_min = np.argmin(dists_tr_from_box) + arg_text_con.append(ind_min) args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) From b622494f34f8366f21ded3f3c8b85b8519245fa7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 21 Nov 2024 02:16:22 +0100 Subject: [PATCH 58/64] new table detection model is integrated --- src/eynollah/eynollah.py | 413 +++++++++++++++++++++++---------------- 1 file changed, 241 insertions(+), 172 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 4f9eaa6..f2426f8 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -264,9 +264,13 @@ def __init__( else: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: - self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" + self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" - self.model_tables = dir_models + "/eynollah-tables_20210319" + if self.tables: + if self.light_version: + self.model_table_dir = dir_models + "/modelens_table_0t4_201124" + else: + self.model_table_dir = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -290,6 +294,9 @@ def __init__( self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) + self.ls_imgs = os.listdir(self.dir_in) @@ -325,9 +332,13 @@ def __init__( self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) - + if self.tables: + self.model_table = self.our_load_model(self.model_table_dir) + self.ls_imgs = os.listdir(self.dir_in) + + def _cache_images(self, image_filename=None, image_pil=None): ret = {} @@ -2326,8 +2337,23 @@ def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_lay ###img_bin = np.copy(prediction_bin) ###else: ###img_bin = np.copy(img_resized) - - img_bin = np.copy(img_resized) + if self.ocr and not self.input_binary: + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) @@ -3175,91 +3201,101 @@ def get_tables_from_model(self, img, num_col_classifier): img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - model_region, session_region = self.start_new_session_and_model(self.model_tables) + + + if self.dir_in: + pass + else: + self.model_table, _ = self.start_new_session_and_model(self.model_table_dir) patches = False - if num_col_classifier < 4 and num_col_classifier > 2: - prediction_table = self.do_prediction(patches, img, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + if self.light_version: + prediction_table = self.do_prediction_new_concept(patches, img, self.model_table) prediction_table = prediction_table.astype(np.int16) + return prediction_table[:,:,0] + else: + if num_col_classifier < 4 and num_col_classifier > 2: + prediction_table = self.do_prediction(patches, img, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) + + elif num_col_classifier ==2: + height_ext = 0#int( img.shape[0]/4. ) + h_start = int(height_ext/2.) + width_ext = int( img.shape[1]/8. ) + w_start = int(width_ext/2.) - elif num_col_classifier ==2: - height_ext = 0#int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/8. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_new = img.shape[0]+height_ext + width_new = img.shape[1]+width_ext + + img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 + img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_ext = self.do_prediction(patches, img_new, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + + prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0# int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/4. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext + elif num_col_classifier ==1: + height_ext = 0# int( img.shape[0]/4. ) + h_start = int(height_ext/2.) + width_ext = int( img.shape[1]/4. ) + w_start = int(width_ext/2.) - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + height_new = img.shape[0]+height_ext + width_new = img.shape[1]+width_ext + + img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 + img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] - prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_ext = self.do_prediction(patches, img_new, self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] + + prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + prediction_table = prediction_table.astype(np.int16) - else: - prediction_table = np.zeros(img.shape) - img_w_half = int(img.shape[1]/2.) + else: + prediction_table = np.zeros(img.shape) + img_w_half = int(img.shape[1]/2.) - pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region) - pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region) - pre_full = self.do_prediction(patches, img[:,:,:], model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) - pre_updown = cv2.flip(pre_updown, -1) - - prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) - prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) - - prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) - prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) + pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], self.model_table) + pre2 = self.do_prediction(patches, img[:,img_w_half:,:], self.model_table) + pre_full = self.do_prediction(patches, img[:,:,:], self.model_table) + pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) + pre_updown = cv2.flip(pre_updown, -1) + + prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) + prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) + + prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) + prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) - prediction_table[:,0:img_w_half,:] = pre1[:,:,:] - prediction_table[:,img_w_half:,:] = pre2[:,:,:] - - prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 - prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 - prediction_table = prediction_table.astype(np.int16) + prediction_table[:,0:img_w_half,:] = pre1[:,:,:] + prediction_table[:,img_w_half:,:] = pre2[:,:,:] + + prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 + prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 + prediction_table = prediction_table.astype(np.int16) + + #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) + #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) - #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - - prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) - prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) - return prediction_table_erode.astype(np.int16) + prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) + prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) + return prediction_table_erode.astype(np.int16) def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') @@ -3500,49 +3536,62 @@ def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p #print(time.time()-t_0_box,'time box in 3.1') if self.tables: - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - #print(time.time()-t_0_box,'time box in 3.2') - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) - #print(time.time()-t_0_box,'time box in 3.3') + if self.light_version: + pass + else: + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + #print(time.time()-t_0_box,'time box in 3.2') + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) if self.tables: - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + if self.light_version: + pass + else: + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables =np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + if self.light_version: + text_regions_p[:,:][table_prediction[:,:]==1] = 10 + img_revised_tab=text_regions_p[:,:] else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + else: + img_revised_tab = np.copy(text_regions_p[:,:]) + img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 + img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + + text_regions_p[:,:][text_regions_p[:,:]==10] = 0 + text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 else: img_revised_tab=text_regions_p[:,:] #img_revised_tab = text_regions_p[:, :] - polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) + if self.light_version: + polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) + else: + polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) pixel_img = 4 min_area_mar = 0.00001 @@ -3565,82 +3614,102 @@ def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, s self.logger.debug('enter run_boxes_full_layout') t_full0 = time.time() if self.tables: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) - - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + if self.light_version: + text_regions_p[:,:][table_prediction[:,:]==1] = 10 + img_revised_tab=text_regions_p[:,:] + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + + regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + else: + text_regions_p_1_n = None + textline_mask_tot_d = None + regions_without_separators_d = None + regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators[table_prediction == 1] = 1 + else: - text_regions_p_1_n = None - textline_mask_tot_d = None - regions_without_separators_d = None + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) + + regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 + regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + else: + text_regions_p_1_n = None + textline_mask_tot_d = None + regions_without_separators_d = None + + regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators[table_prediction == 1] = 1 - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) - regions_without_separators[table_prediction == 1] = 1 - - pixel_lines=3 - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) - - if num_col_classifier>=3: + pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) + num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) - else: - pass - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) - - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) + + if num_col_classifier>=3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) + + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) + else: + pass + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + + img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) + + else: + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - - ##img_revised_tab=img_revised_tab2[:,:,0] - #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 - #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 + if np.abs(slope_deskew) < 0.13: + img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + else: + img_revised_tab = np.copy(text_regions_p[:,:]) + img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 + img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + + + ##img_revised_tab=img_revised_tab2[:,:,0] + #img_revised_tab=text_regions_p[:,:] + text_regions_p[:,:][text_regions_p[:,:]==10] = 0 + text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 pixel_img = 4 min_area_mar = 0.00001 From 1746920275a759e06efa5a862dc39b898e4db75c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 21 Nov 2024 12:08:29 +0100 Subject: [PATCH 59/64] Update Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a3b7b95..454c75e 100644 --- a/Makefile +++ b/Makefile @@ -32,9 +32,9 @@ models_eynollah: models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz' - wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' + # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' # Install with pip install: From 3000255a243105ed82ae5059117c43d6bc93f31d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 22 Nov 2024 12:40:21 +0100 Subject: [PATCH 60/64] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 454c75e..6089f6e 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ models_eynollah: models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz' # wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz' From 8014a9e416dd4bf80f4047d55644844d4d75293a Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 22 Nov 2024 19:47:06 +0100 Subject: [PATCH 61/64] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6089f6e..506fcf7 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ install-dev: pip install -e . smoke-test: - eynollah -i tests/resources/kant_aufklaerung_1784_0020.tif -o . -m $(PWD)/models_eynollah + eynollah layout -i tests/resources/kant_aufklaerung_1784_0020.tif -o . -m $(PWD)/models_eynollah # Run unit tests test: From 1083d1c7fb48d9182e6f635b6815dbc34b145e24 Mon Sep 17 00:00:00 2001 From: kba Date: Mon, 25 Nov 2024 19:32:42 +0100 Subject: [PATCH 62/64] gha: try to free disk space --- .github/workflows/test-eynollah.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 3a33dcf..8a6941f 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -14,6 +14,12 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11'] steps: + - name: clean up + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v4 - uses: actions/cache@v4 id: model_cache From 6aad006f4c556b33a1d23d83c20fe2ca112448bc Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 2 Dec 2024 12:43:57 +0100 Subject: [PATCH 63/64] filter textregions without textline --- src/eynollah/eynollah.py | 45 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index f2426f8..c28c441 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4772,8 +4772,45 @@ def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None - + def filter_contours_without_textline_inside(self,contours,text_con_org, contours_textline): + + ###contours_txtline_of_all_textregions = [] + + ###for jj in range(len(contours_textline)): + ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] + + ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] + ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) for j in range(len(M_main_textline))] + + ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + ###contours_with_textline = [] + ###for ind_tr, con_tr in enumerate(contours): + ###results = [cv2.pointPolygonTest(con_tr, (cx_main_textline[index_textline_con], cy_main_textline[index_textline_con]), False) for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] + + ###results = np.array(results) + ###if np.any(results==1): + ###contours_with_textline.append(con_tr) + + textregion_index_to_del = [] + for index_textregion, textlines_textregion in enumerate(contours_textline): + if len(textlines_textregion)==0: + textregion_index_to_del.append(index_textregion) + + uniqe_args_trs = np.unique(textregion_index_to_del) + uniqe_args_trs_sorted = np.sort(uniqe_args_trs)[::-1] + + + for ind_u_a_trs in uniqe_args_trs_sorted: + contours.pop(ind_u_a_trs) + contours_textline.pop(ind_u_a_trs) + text_con_org.pop(ind_u_a_trs) + + return contours, text_con_org, contours_textline def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -5239,6 +5276,8 @@ def run(self): all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) + contours_only_text_parent, txt_con_org, all_found_textline_polygons = self.filter_contours_without_textline_inside(contours_only_text_parent,txt_con_org, all_found_textline_polygons) + else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) @@ -5395,17 +5434,17 @@ def run(self): if self.textline_light: mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) ocr_textline_in_textregion.append(text_ocr) - ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 ocr_all_textlines.append(ocr_textline_in_textregion) From 871d7bfc5a76d8a81b4aec0b4b7c701eeeb883f9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 4 Dec 2024 16:41:00 +0100 Subject: [PATCH 64/64] fixed: machine based reading order cause tuple index out of range error if number of textregion is one. --- src/eynollah/eynollah.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index c28c441..e802e29 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4013,7 +4013,10 @@ def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text inference_bs = 3 input_1= np.zeros( (inference_bs, height1, width1,3)) starting_list_of_regions = [] - starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + if len(co_text_all)<=1: + starting_list_of_regions.append( list(range(1)) ) + else: + starting_list_of_regions.append( list(range(labels_con.shape[2])) ) index_update = 0 index_selected = starting_list_of_regions[0] #print(labels_con.shape[2],"number of regions for reading order")