From edf53032663b2f91b8e0aa71543a823eb1dd62fa Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 29 Jun 2023 14:41:47 +0300 Subject: [PATCH 01/28] first commit of theme --- app/main/check_packs/pack_config.py | 6 +- app/main/checks/__init__.py | 7 +- .../checks/presentation_checks/__init__.py | 1 + .../presentation_checks/find_def_sld.py | 9 +- .../presentation_checks/find_theme_in_pres.py | 57 +++++++ app/main/checks/report_checks/__init__.py | 1 + .../report_checks/find_theme_in_report.py | 150 ++++++++++++++++++ 7 files changed, 223 insertions(+), 8 deletions(-) create mode 100644 app/main/checks/presentation_checks/find_theme_in_pres.py create mode 100644 app/main/checks/report_checks/find_theme_in_report.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index cf0b85b2..e5f0c93b 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -14,7 +14,8 @@ ['pres_right_words'], ['pres_image_share'], ['future_dev'], - ['pres_banned_words_check'] + ['pres_banned_words_check'], + ['theme_in_pres_check'], ] BASE_REPORT_CRITERION = [ ["simple_check"], @@ -37,7 +38,8 @@ ["needed_headers_check"], ["header_check"], ["report_section_component"], - ["main_text_check"] + ["main_text_check"], + ["theme_in_report_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py index 8f65c507..7b7acb68 100644 --- a/app/main/checks/__init__.py +++ b/app/main/checks/__init__.py @@ -17,7 +17,8 @@ PresRightWordsCheck.id: PresRightWordsCheck, PresImageShareCheck.id: PresImageShareCheck, FurtherDev.id: FurtherDev, - PresBannedWordsCheck.id: PresBannedWordsCheck + PresBannedWordsCheck.id: PresBannedWordsCheck, + FindThemeInPres.id: FindThemeInPres, }, 'report': { ReportSimpleCheck.id: ReportSimpleCheck, @@ -38,6 +39,8 @@ ReportNeededHeadersCheck.id: ReportNeededHeadersCheck, ReportChapters.id: ReportChapters, ReportSectionComponent.id: ReportSectionComponent, - ReportMainTextCheck.id: ReportMainTextCheck + ReportMainTextCheck.id: ReportMainTextCheck, + FindThemeInReport.id: FindThemeInReport, + } } diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index 81ec9cbd..ca032913 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -10,3 +10,4 @@ from .pres_right_words import PresRightWordsCheck from .image_share import PresImageShareCheck from .banned_words import PresBannedWordsCheck +from .find_theme_in_pres import FindThemeInPres diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py index 46c64b0f..33a66ff5 100644 --- a/app/main/checks/presentation_checks/find_def_sld.py +++ b/app/main/checks/presentation_checks/find_def_sld.py @@ -8,18 +8,19 @@ class FindDefSld(BasePresCriterion): def __init__(self, file_info, key_slide): super().__init__(file_info) self.type_of_slide = key_slide + self.found_idxs = [] def check(self): - found_slides, found_idxs = [], [] + found_slides = [] for i, title in enumerate(self.file.get_titles(), 1): if str(title).lower().find(str(self.type_of_slide).lower()) != -1: found_slides.append(self.file.get_text_from_slides()[i - 1]) - found_idxs.append(i) + self.found_idxs.append(i) if len(found_slides) == 0: return answer(False, 'Слайд не найден') else: - found_idxs = self.format_page_link(found_idxs) - return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs)))) + found_idxs_link = self.format_page_link(self.found_idxs) + return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link)))) @property def name(self): diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py new file mode 100644 index 00000000..c55f64b9 --- /dev/null +++ b/app/main/checks/presentation_checks/find_theme_in_pres.py @@ -0,0 +1,57 @@ + +from ..base_check import BasePresCriterion, answer +from .find_def_sld import FindDefSld +from app.nlp.stemming import Stemming + +import string +import nltk +from nltk.tokenize import word_tokenize, sent_tokenize +from nltk.corpus import stopwords +from pymorphy2 import MorphAnalyzer + +nltk.download('stopwords') +MORPH_ANALYZER = MorphAnalyzer() + + +class FindThemeInPres(BasePresCriterion): + + description = "Проверка упоминания темы в презентации" + id = 'theme_in_pres_check' + + def __init__(self, file_info): + super().__init__(file_info) + self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") + + def check(self): + + stop_words = set(stopwords.words("russian")) + + self.check_conclusion.check() + page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs"))) + + text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion] + theme = ''.join(word for word in text_from_title[0]) + + translator = str.maketrans('', '', string.punctuation) + theme_without_punct = theme.translate(translator) + words_in_theme = word_tokenize(theme_without_punct) + # for word in words_in_theme: + lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} + + + text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1] + string_from_text = ''.join(text_from_slide) + + text_without_punct = string_from_text.translate(translator) + words_in_text = word_tokenize(text_without_punct) + + lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words} + + intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100 + + if intersection == 0: + return answer(False, f"Не пройдена! {intersection}") + elif 1 < intersection < 40: + return answer(False, f"Обратите внимание! {intersection} %") + else: + return answer (True, f'Пройдена! {intersection} %') diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index a2fa3819..a8ef3c6b 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -17,4 +17,5 @@ from .short_sections_check import ReportShortSectionsCheck from .simple_check import ReportSimpleCheck from .style_check_settings import StyleCheckSettings +from .find_theme_in_report import FindThemeInReport diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py new file mode 100644 index 00000000..3002704c --- /dev/null +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -0,0 +1,150 @@ +import re +import string + +from ..base_check import BaseReportCriterion, answer +# from .find_def_sld import FindDefSld +# from app.nlp.stemming import Stemming +from ...reports.pdf_document.pdf_document_manager import PdfDocumentManager +import pdfplumber +from ...reports.docx_uploader import DocxUploader + +import string +import nltk +from nltk.tokenize import word_tokenize, sent_tokenize +from nltk.corpus import stopwords +from pymorphy2 import MorphAnalyzer + +nltk.download('stopwords') +MORPH_ANALYZER = MorphAnalyzer() + + +class FindThemeInReport(BaseReportCriterion): + + description = "Проверка упоминания темы в отчете" + id = 'theme_in_report_check' + + def __init__(self, file_info): + super().__init__(file_info) + self.intro = {} + self.chapters = [] + self.text_par = [] + self.full_text = set() + + def late_init(self): + self.chapters = self.file.make_chapters(self.file_type['report_type']) + + def check(self): + stop_words = set(stopwords.words("russian")) + if self.file.page_counter() < 4: + return answer(False, "В отчете недостаточно страниц. Нечего проверять.") + + self.late_init() + for intro in self.chapters: + header = intro["text"].lower() + if header not in ['заключение', "введение", "список использованных источников", "условные обозначения"]: + self.intro = intro + for intro_par in self.intro['child']: + par = intro_par['text'].lower() + self.text_par.append(par) + lemma_theme = self.find_theme() + for i in self.text_par: + translator = str.maketrans('', '', string.punctuation) + theme_without_punct = i.translate(translator) + word_in_text = word_tokenize(theme_without_punct) + lemma_text = {MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words} + self.full_text.update(lemma_text) + + intersection = lemma_theme.intersection(self.full_text) + int_pr = round(len(intersection)*100//len(lemma_theme)) + + return answer(True, f'{lemma_theme} {intersection} hhh {int_pr}') + + + + + + + def find_theme(self): + stop_words = set(stopwords.words("russian")) + lemma_theme = [] + for key, text_on_page in self.file.pdf_file.get_text_on_page().items(): + if key == 1: + lower_text = text_on_page.lower() + text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) + list_full = text_without_punct.split() + start = list_full.index('тема') + 1 + end = list_full.index('студент') + list_theme = list_full[start:end] + lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if + word not in stop_words} + return lemma_theme + + + + + + # full_text_pre = self.file.pdf_file.text_on_page + # full_text = ''.jo + # start_text = full_text.index['1.'] + # end_text = full_text.index['ЗАКЛЮЧЕНИЕ'] + # text_for_analys = full_text[start_text:end_text] + # lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in text_for_analys if word not in stop_words} + + # for text_on_page in self.file.pdf_file.get_text_on_page().values(): + # + # lower_text = text_on_page.lower() + # text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) + # list_full = text_without_punct.split() + # start = list_full.index('тема') + # end = list_full.index('студент') + # list_theme = list_full[start:end] + # lemma_theme = ({MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if + # word not in stop_words}) + + + + + + +# class FindThemeInReport(BaseReportCriterion): +# +# description = "Проверка упоминания темы в отчете" +# id = 'theme_in_report_check' +# +# def __init__(self, file_info): +# super().__init__(file_info) +# self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") +# +# def check(self): +# +# stop_words = set(stopwords.words("russian")) +# +# self.check_conclusion.check() +# page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs"))) +# +# text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion] +# theme = ''.join(word for word in text_from_title[0]) +# +# translator = str.maketrans('', '', string.punctuation) +# theme_without_punct = theme.translate(translator) +# words_in_theme = word_tokenize(theme_without_punct) +# # for word in words_in_theme: +# lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} +# +# +# text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1] +# string_from_text = ''.join(text_from_slide) +# +# text_without_punct = string_from_text.translate(translator) +# words_in_text = word_tokenize(text_without_punct) +# +# lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words} +# +# intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100 +# +# if intersection == 0: +# return answer(False, f"Не пройдена! {intersection}") +# elif 1 < intersection < 40: +# return answer(False, f"Обратите внимание! {intersection} %") +# else: +# return answer (True, f'Пройдена! {intersection} %') From d9d6e6ae050ab78d3954adfbd782045a453283e5 Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 4 Jul 2023 12:16:14 +0300 Subject: [PATCH 02/28] fix image share check --- .../checks/report_checks/image_share_check.py | 47 +++++++++---------- .../pdf_document/pdf_document_manager.py | 27 ++++++++++- requirements.txt | 13 +++-- 3 files changed, 58 insertions(+), 29 deletions(-) diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py index 1883a7a4..7929816f 100644 --- a/app/main/checks/report_checks/image_share_check.py +++ b/app/main/checks/report_checks/image_share_check.py @@ -1,6 +1,5 @@ from ..base_check import BaseReportCriterion, answer - class ReportImageShareCheck(BaseReportCriterion): description = "Проверка доли объема отчёта, приходящейся на изображения" id = 'image_share_check' @@ -12,27 +11,25 @@ def __init__(self, file_info, limit=0.3): def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") - images_height = 0 - for image in self.file.inline_shapes: - images_height += image.height.cm - if len(self.file.file.sections): - available_space = self.file.file.sections[0].page_height.cm - self.file.file.sections[0].bottom_margin.cm - \ - self.file.file.sections[0].top_margin.cm - images_pages = images_height / available_space - share = images_pages / self.file.count - if share > self.limit: - result_str = f'Проверка не пройдена! Изображения в работе занимают около {round(share, 2)} объема ' \ - f'документа без учета приложения, ограничение - {round(self.limit, 2)}' - result_str += ''' - Если доля отчета, приходящаяся на изображения, больше нормы, попробуйте сделать следующее: - - ''' - return answer(False, result_str) - else: - return answer(True, f'Пройдена!') - return answer(False, 'Во время обработки произошла критическая ошибка') + images_height = self.file.pdf_file.page_images() + available_space = self.file.pdf_file.page_height() + + images_value = images_height/available_space + + if images_value > self.limit: + result_str = f'Проверка не пройдена! Изображения в работе занимают около {round(images_value, 2)} объема ' \ + f'документа без учета приложения, ограничение - {round(self.limit, 2)}' + result_str += ''' + Если доля отчета, приходящаяся на изображения, больше нормы, попробуйте сделать следующее: + + ''' + return answer(False, result_str) + else: + return answer(True, f'Пройдена!') + + # return answer(False, f'Во время обработки произошла критическая ошибка') diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py index ddc125e0..d5679653 100644 --- a/app/main/reports/pdf_document/pdf_document_manager.py +++ b/app/main/reports/pdf_document/pdf_document_manager.py @@ -1,14 +1,18 @@ + import pdfplumber +import fitz -from app.utils import convert_to +from app.utils import convert_to class PdfDocumentManager: def __init__(self, path_to_file, pdf_filepath=''): if not pdf_filepath: self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf')) + self.pdf_fitz = fitz.open(convert_to(path_to_file, target_format='pdf')) else: self.pdf_file = pdfplumber.open(pdf_filepath) + self.pdf_fitz = fitz.open(pdf_filepath) self.pages = self.pdf_file.pages self.page_count = len(self.pages) self.text_on_page = self.get_text_on_page() @@ -18,6 +22,27 @@ def __init__(self, path_to_file, pdf_filepath=''): def get_text_on_page(self): return {page + 1: self.pages[page].extract_text() for page in range(self.page_count)} + def page_images(self): + total_height = 0 + for page_num in range(self.page_count): + page = self.pdf_fitz[page_num] + images = self.pdf_fitz.get_page_images(page) + for image in images: + image_coord = page.get_image_bbox(image[7], transform=0) + total_height += (image_coord[3] - image_coord[1]) + + return total_height + + def page_height(self): + page = self.pdf_fitz[0] # get first page as a sample + page_rect = page.rect + height = page_rect.height + top_margin = page_rect.y0 + bottom_margin = height - page_rect.y1 + available_space = (height - top_margin - bottom_margin)*self.page_count + + return available_space + # def get_only_text_on_page(self): # if not self.only_text_on_page: # only_text_on_page = {} diff --git a/requirements.txt b/requirements.txt index 9acf11fc..afe3a243 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ werkzeug==2.0.0 Flask==2.0.3 jinja2==3.0.0 -requests==2.24.0 +requests~=2.31.0 python-pptx==0.6.18 odfpy==1.4.1 pymongo==3.11.1 flask-login==0.5.0 numpy==1.22 -scipy==1.7.1 +scipy~=1.10.1 pymorphy2==0.9.1 nltk==3.6.6 flask-recaptcha==0.4.2 @@ -16,7 +16,7 @@ flask-security==3.0.0 celery==5.2.2 flower==1.2.0 redis==3.5.3 -pandas==1.3.4 +pandas~=2.0.3 fsspec==2022.2.0 python-docx==0.8.11 odfpy==1.4.1 @@ -25,3 +25,10 @@ docx2python~=2.0.4 oauthlib~=3.1.0 pdfplumber==0.6.1 pytest~=7.1.2 + +PyMuPDF~=1.22.5 +PyPDF2~=3.0.1 + +configparser~=5.3.0 +pytz~=2023.3 +urllib3~=2.0.3 \ No newline at end of file From b837d60431473df97267cf9c6450630f6f86c6ba Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 4 Jul 2023 18:35:39 +0300 Subject: [PATCH 03/28] theme in text light check --- .../presentation_checks/find_theme_in_pres.py | 19 ++-- .../report_checks/find_theme_in_report.py | 100 +++--------------- 2 files changed, 23 insertions(+), 96 deletions(-) diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py index c55f64b9..a0dd583c 100644 --- a/app/main/checks/presentation_checks/find_theme_in_pres.py +++ b/app/main/checks/presentation_checks/find_theme_in_pres.py @@ -3,7 +3,7 @@ from .find_def_sld import FindDefSld from app.nlp.stemming import Stemming -import string +import string import nltk from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords @@ -18,9 +18,10 @@ class FindThemeInPres(BasePresCriterion): description = "Проверка упоминания темы в презентации" id = 'theme_in_pres_check' - def __init__(self, file_info): + def __init__(self, file_info, limit = 40): super().__init__(file_info) self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") + self.limit = limit def check(self): @@ -35,7 +36,6 @@ def check(self): translator = str.maketrans('', '', string.punctuation) theme_without_punct = theme.translate(translator) words_in_theme = word_tokenize(theme_without_punct) - # for word in words_in_theme: lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} @@ -47,11 +47,12 @@ def check(self): lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words} - intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100 + value_intersection = round(len(lemma_theme.intersection(lemma_text))*100//len(lemma_theme)) - if intersection == 0: - return answer(False, f"Не пройдена! {intersection}") - elif 1 < intersection < 40: - return answer(False, f"Обратите внимание! {intersection} %") + if value_intersection == 0: + return answer(False, f"Не пройдена! В презентации не упоминаются слова, завяленные в теме.") + elif 1 < value_intersection < self.limit: + return answer(False, + f"Не пройдена! Процент упоминания темы в вашей презентации ({value_intersection} %) ниже требуемого ({self.limit} %).") else: - return answer (True, f'Пройдена! {intersection} %') + return answer(True, f'Пройдена! Процент упоминания темы в презентации: {value_intersection} %') diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 3002704c..e19ba449 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -2,11 +2,6 @@ import string from ..base_check import BaseReportCriterion, answer -# from .find_def_sld import FindDefSld -# from app.nlp.stemming import Stemming -from ...reports.pdf_document.pdf_document_manager import PdfDocumentManager -import pdfplumber -from ...reports.docx_uploader import DocxUploader import string import nltk @@ -23,12 +18,13 @@ class FindThemeInReport(BaseReportCriterion): description = "Проверка упоминания темы в отчете" id = 'theme_in_report_check' - def __init__(self, file_info): + def __init__(self, file_info, limit = 40): super().__init__(file_info) self.intro = {} self.chapters = [] self.text_par = [] self.full_text = set() + self.limit = limit def late_init(self): self.chapters = self.file.make_chapters(self.file_type['report_type']) @@ -47,22 +43,22 @@ def check(self): par = intro_par['text'].lower() self.text_par.append(par) lemma_theme = self.find_theme() - for i in self.text_par: + + for text in self.text_par: translator = str.maketrans('', '', string.punctuation) - theme_without_punct = i.translate(translator) + theme_without_punct = text.translate(translator) word_in_text = word_tokenize(theme_without_punct) lemma_text = {MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words} self.full_text.update(lemma_text) intersection = lemma_theme.intersection(self.full_text) - int_pr = round(len(intersection)*100//len(lemma_theme)) - - return answer(True, f'{lemma_theme} {intersection} hhh {int_pr}') - - - - - + value_intersection = round(len(intersection)*100//len(lemma_theme)) + if value_intersection == 0: + return answer(False, f"Не пройдена! В отчете не упоминаются слова, завяленные в теме отчета.") + elif 1 < value_intersection < self.limit: + return answer(False, f"Не пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %).") + else: + return answer (True, f'Пройдена! Процент упоминания темы в ответе: {value_intersection} %.') def find_theme(self): stop_words = set(stopwords.words("russian")) @@ -77,74 +73,4 @@ def find_theme(self): list_theme = list_full[start:end] lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if word not in stop_words} - return lemma_theme - - - - - - # full_text_pre = self.file.pdf_file.text_on_page - # full_text = ''.jo - # start_text = full_text.index['1.'] - # end_text = full_text.index['ЗАКЛЮЧЕНИЕ'] - # text_for_analys = full_text[start_text:end_text] - # lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in text_for_analys if word not in stop_words} - - # for text_on_page in self.file.pdf_file.get_text_on_page().values(): - # - # lower_text = text_on_page.lower() - # text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) - # list_full = text_without_punct.split() - # start = list_full.index('тема') - # end = list_full.index('студент') - # list_theme = list_full[start:end] - # lemma_theme = ({MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if - # word not in stop_words}) - - - - - - -# class FindThemeInReport(BaseReportCriterion): -# -# description = "Проверка упоминания темы в отчете" -# id = 'theme_in_report_check' -# -# def __init__(self, file_info): -# super().__init__(file_info) -# self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") -# -# def check(self): -# -# stop_words = set(stopwords.words("russian")) -# -# self.check_conclusion.check() -# page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs"))) -# -# text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion] -# theme = ''.join(word for word in text_from_title[0]) -# -# translator = str.maketrans('', '', string.punctuation) -# theme_without_punct = theme.translate(translator) -# words_in_theme = word_tokenize(theme_without_punct) -# # for word in words_in_theme: -# lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} -# -# -# text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1] -# string_from_text = ''.join(text_from_slide) -# -# text_without_punct = string_from_text.translate(translator) -# words_in_text = word_tokenize(text_without_punct) -# -# lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words} -# -# intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100 -# -# if intersection == 0: -# return answer(False, f"Не пройдена! {intersection}") -# elif 1 < intersection < 40: -# return answer(False, f"Обратите внимание! {intersection} %") -# else: -# return answer (True, f'Пройдена! {intersection} %') + return lemma_theme \ No newline at end of file From 116c5e803d2c30e582c2e8d98716b5445f97f452 Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 13 Jul 2023 18:20:22 +0300 Subject: [PATCH 04/28] optimize with add found_index --- .../checks/presentation_checks/find_def_sld.py | 4 ++++ .../presentation_checks/find_theme_in_pres.py | 15 ++++++++------- app/main/presentations/odp/presentation_odp.py | 1 + app/main/presentations/pptx/presentation_pptx.py | 2 ++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py index 33a66ff5..fbb68ad8 100644 --- a/app/main/checks/presentation_checks/find_def_sld.py +++ b/app/main/checks/presentation_checks/find_def_sld.py @@ -19,6 +19,10 @@ def check(self): if len(found_slides) == 0: return answer(False, 'Слайд не найден') else: + if self.type_of_slide == 'Заключение': + self.file.found_index['Заключение'] = ''.join(str(item) for item in self.found_idxs) + else: + self.file.found_index['Заключение'] = None found_idxs_link = self.format_page_link(self.found_idxs) return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link)))) diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py index a0dd583c..51518946 100644 --- a/app/main/checks/presentation_checks/find_theme_in_pres.py +++ b/app/main/checks/presentation_checks/find_theme_in_pres.py @@ -18,27 +18,28 @@ class FindThemeInPres(BasePresCriterion): description = "Проверка упоминания темы в презентации" id = 'theme_in_pres_check' - def __init__(self, file_info, limit = 40): + def __init__(self, file_info, limit = 60): super().__init__(file_info) - self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") + # self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение") self.limit = limit def check(self): stop_words = set(stopwords.words("russian")) + if self.file.found_index['Заключение'] is not None: + page_conclusion = self.file.found_index['Заключение'] - self.check_conclusion.check() - page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs"))) + # self.check_conclusion.check() + # page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs"))) - text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion] - theme = ''.join(word for word in text_from_title[0]) + text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion] + theme = ''.join(word for word in text_from_title[0]) translator = str.maketrans('', '', string.punctuation) theme_without_punct = theme.translate(translator) words_in_theme = word_tokenize(theme_without_punct) lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} - text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1] string_from_text = ''.join(text_from_slide) diff --git a/app/main/presentations/odp/presentation_odp.py b/app/main/presentations/odp/presentation_odp.py index bb3a66d5..90346696 100644 --- a/app/main/presentations/odp/presentation_odp.py +++ b/app/main/presentations/odp/presentation_odp.py @@ -12,6 +12,7 @@ def __init__(self, presentation_name): self.prs = opendocument.load(presentation_name) self.parse_styles() self.add_slides() + self.found_index = {} def add_slides(self): for slide in self.prs.getElementsByType(draw.Page): diff --git a/app/main/presentations/pptx/presentation_pptx.py b/app/main/presentations/pptx/presentation_pptx.py index 869846e0..b01d0b40 100644 --- a/app/main/presentations/pptx/presentation_pptx.py +++ b/app/main/presentations/pptx/presentation_pptx.py @@ -9,6 +9,8 @@ def __init__(self, presentation_name): PresentationBasic.__init__(self, presentation_name) self.prs = Presentation(presentation_name) self.add_slides() + self.found_index = {} + def add_slides(self): for index, slide in enumerate(self.prs.slides, 1): From 5523f0a4b12941e66d830e1c9f054ac27955b643 Mon Sep 17 00:00:00 2001 From: Marina Date: Tue, 1 Aug 2023 16:48:48 +0300 Subject: [PATCH 05/28] fix page_count (without pril) --- .../headers_at_page_top_check.py | 2 +- .../checks/report_checks/image_share_check.py | 4 +- .../report_checks/literature_references.py | 2 +- .../pdf_document/pdf_document_manager.py | 38 ++++++++++--------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/app/main/checks/report_checks/headers_at_page_top_check.py b/app/main/checks/report_checks/headers_at_page_top_check.py index 4be9ad21..1fb728ae 100644 --- a/app/main/checks/report_checks/headers_at_page_top_check.py +++ b/app/main/checks/report_checks/headers_at_page_top_check.py @@ -25,7 +25,7 @@ def check(self): if self.file_type["report_type"] == 'LR': for header in self.headers: found = False - for page_num in range(1, self.pdf.page_count): + for page_num in range(1, self.pdf.page_count_all): lines = self.pdf.text_on_page[page_num + 1].split("\n") last_header_line = 0 collected_text = "" diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py index 7929816f..2f68b39a 100644 --- a/app/main/checks/report_checks/image_share_check.py +++ b/app/main/checks/report_checks/image_share_check.py @@ -11,8 +11,8 @@ def __init__(self, file_info, limit=0.3): def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") - images_height = self.file.pdf_file.page_images() - available_space = self.file.pdf_file.page_height() + images_height = self.file.pdf_file.page_images(page_without_pril=self.file.count) + available_space = self.file.pdf_file.page_height(page_without_pril=self.file.count) images_value = images_height/available_space diff --git a/app/main/checks/report_checks/literature_references.py b/app/main/checks/report_checks/literature_references.py index b4ed3335..8ecf1c1d 100644 --- a/app/main/checks/report_checks/literature_references.py +++ b/app/main/checks/report_checks/literature_references.py @@ -129,7 +129,7 @@ def count_sources(self): def search_literature_start_pdf(self): start_page = 0 - end_page = self.file.pdf_file.page_count + end_page = self.file.pdf_file.page_count_all for i in self.file.pdf_file.text_on_page.keys(): lowercase_str = self.file.pdf_file.text_on_page[i].lower() if re.search(self.name_pattern, lowercase_str): diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py index d5679653..45c952d5 100644 --- a/app/main/reports/pdf_document/pdf_document_manager.py +++ b/app/main/reports/pdf_document/pdf_document_manager.py @@ -1,5 +1,5 @@ -import pdfplumber +# import pdfplumber import fitz @@ -8,38 +8,42 @@ class PdfDocumentManager: def __init__(self, path_to_file, pdf_filepath=''): if not pdf_filepath: - self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf')) - self.pdf_fitz = fitz.open(convert_to(path_to_file, target_format='pdf')) + # self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf')) + self.pdf_file = fitz.open(convert_to(path_to_file, target_format='pdf')) else: - self.pdf_file = pdfplumber.open(pdf_filepath) - self.pdf_fitz = fitz.open(pdf_filepath) - self.pages = self.pdf_file.pages - self.page_count = len(self.pages) + # self.pdf_file = pdfplumber.open(pdf_filepath) + self.pdf_file = fitz.open(pdf_filepath) + self.pages = [self.pdf_file.load_page(page_num) for page_num in range(self.pdf_file.page_count)] + self.page_count_all = self.pdf_file.page_count + # self.page_count = len(self.pages) + # self.pages = self.pdf_file.pages self.text_on_page = self.get_text_on_page() # self.bboxes = [] # self.only_text_on_page = {} def get_text_on_page(self): - return {page + 1: self.pages[page].extract_text() for page in range(self.page_count)} + return {page_num + 1: page.get_text() for page_num, page in enumerate(self.pages)} - def page_images(self): + # def get_text_on_page(self): + # return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)} + + def page_images(self, page_without_pril): total_height = 0 - for page_num in range(self.page_count): - page = self.pdf_fitz[page_num] - images = self.pdf_fitz.get_page_images(page) + for page_num in range(page_without_pril): + page = self.pdf_file[page_num] + images = self.pdf_file.get_page_images(page) for image in images: image_coord = page.get_image_bbox(image[7], transform=0) total_height += (image_coord[3] - image_coord[1]) return total_height - def page_height(self): - page = self.pdf_fitz[0] # get first page as a sample + def page_height(self, page_without_pril): + page = self.pdf_file[0] # get first page as a sample page_rect = page.rect - height = page_rect.height - top_margin = page_rect.y0 + height, top_margin = page_rect.height, page_rect.y0 bottom_margin = height - page_rect.y1 - available_space = (height - top_margin - bottom_margin)*self.page_count + available_space = (height - top_margin - bottom_margin)*page_without_pril return available_space From 488e8cd65d8bb134a7dffdf7ce63bd7cb74258cd Mon Sep 17 00:00:00 2001 From: Marina Date: Wed, 8 Nov 2023 12:29:27 +0300 Subject: [PATCH 06/28] fix conflicts --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index afe3a243..46b14691 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,10 +25,8 @@ docx2python~=2.0.4 oauthlib~=3.1.0 pdfplumber==0.6.1 pytest~=7.1.2 - PyMuPDF~=1.22.5 PyPDF2~=3.0.1 - configparser~=5.3.0 pytz~=2023.3 urllib3~=2.0.3 \ No newline at end of file From 9ff54aabb0dfbf9cfc9cbe40a1faec72bdbc56ea Mon Sep 17 00:00:00 2001 From: Marina Date: Wed, 8 Nov 2023 13:03:27 +0300 Subject: [PATCH 07/28] fix mistakes --- app/main/check_packs/pack_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index df7ef82c..b7456f0b 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -15,7 +15,7 @@ ['pres_image_share'], ['future_dev'], ['pres_banned_words_check'], - ['pres_empty_slide'],, + ['pres_empty_slide'], ['theme_in_pres_check'], ] BASE_REPORT_CRITERION = [ From d5b081def70ae21192e4ccaef9dc7b04550305a4 Mon Sep 17 00:00:00 2001 From: Anton Toropygin Date: Thu, 23 Nov 2023 15:32:22 +0300 Subject: [PATCH 08/28] max_abstract_init --- app/main/check_packs/pack_config.py | 3 ++- app/main/checks/__init__.py | 3 ++- app/main/checks/report_checks/__init__.py | 2 +- .../checks/report_checks/max_abstract_size_check.py | 12 ++++++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 app/main/checks/report_checks/max_abstract_size_check.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 0639689b..46ba2693 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -39,7 +39,8 @@ ["header_check"], ["report_section_component"], ["main_text_check"], - ["spelling_check"] + ["spelling_check"], + ["max_abstract_size_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py index 8e643b62..d6ad8daa 100644 --- a/app/main/checks/__init__.py +++ b/app/main/checks/__init__.py @@ -37,6 +37,7 @@ ReportChapters.id: ReportChapters, ReportSectionComponent.id: ReportSectionComponent, ReportMainTextCheck.id: ReportMainTextCheck, - SpellingCheck.id: SpellingCheck + SpellingCheck.id: SpellingCheck, + ReportMaxSizeOfAbstractCheck.id: ReportMaxSizeOfAbstractCheck, } } diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index 61a4f8a2..114d8703 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -21,4 +21,4 @@ from .sections_check import LRReportSectionCheck from .style_check import ReportStyleCheck from .spelling_check import SpellingCheck - +from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py new file mode 100644 index 00000000..47f6b07a --- /dev/null +++ b/app/main/checks/report_checks/max_abstract_size_check.py @@ -0,0 +1,12 @@ +from app.main.checks.base_check import BaseReportCriterion + + +class ReportMaxSizeOfAbstractCheck(BaseReportCriterion): + description = "Максимальный размер раздела Реферат в ВКР" + id = "max_abstract_size_check" + + def __init__(self, file_info): + super().__init__(file_info) + + def check(self): + return "123123" \ No newline at end of file From c834768d8f44452aa7579e1ef76da1fe97015da2 Mon Sep 17 00:00:00 2001 From: Anton Toropygin Date: Fri, 24 Nov 2023 18:04:19 +0300 Subject: [PATCH 09/28] fix-import --- app/main/checks/report_checks/max_abstract_size_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py index 47f6b07a..6e62f27f 100644 --- a/app/main/checks/report_checks/max_abstract_size_check.py +++ b/app/main/checks/report_checks/max_abstract_size_check.py @@ -1,4 +1,4 @@ -from app.main.checks.base_check import BaseReportCriterion +from ..base_check import BaseReportCriterion, answer class ReportMaxSizeOfAbstractCheck(BaseReportCriterion): @@ -9,4 +9,4 @@ def __init__(self, file_info): super().__init__(file_info) def check(self): - return "123123" \ No newline at end of file + return answer(True, "123123") \ No newline at end of file From 143b5becaf2192ad667f58cf7dd761f00e5edc7a Mon Sep 17 00:00:00 2001 From: Anton Toropygin Date: Mon, 27 Nov 2023 00:24:02 +0300 Subject: [PATCH 10/28] max-size-done --- .../report_checks/max_abstract_size_check.py | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py index 6e62f27f..d6195565 100644 --- a/app/main/checks/report_checks/max_abstract_size_check.py +++ b/app/main/checks/report_checks/max_abstract_size_check.py @@ -7,6 +7,32 @@ class ReportMaxSizeOfAbstractCheck(BaseReportCriterion): def __init__(self, file_info): super().__init__(file_info) + self.headers = [] + self.max_size = 0 + + def late_init(self): + self.headers = self.file.make_headers(self.file_type['report_type']) + self.max_size = 1 def check(self): - return answer(True, "123123") \ No newline at end of file + self.late_init() + referat_page = 0 + abstract_page = 0 + main_page = 0 + for header in self.headers: + if header["name"] == "Реферат": + referat_page = header["page"] + if header["name"] == "Abstract": + abstract_page = header["page"] + if header["name"] == "Содержание": + main_page = header["page"] + referat_size = abstract_page - referat_page + abstract_size = main_page - abstract_page + if referat_size > self.max_size: + return answer(False, + f"

Размер раздела \"Реферат\" равен {referat_size} страницы, должен быть {self.max_size}") + if abstract_size > self.max_size: + return answer(False, + f"

Размер раздела \"Abstract\" равен {abstract_size} страницы, должен быть {self.max_size}") + return answer(True, + f"

Размеры разделов \"Реферат\" и \"Abstract\" соответствуют шаблону") From f9a8dd62dd7ee2625461d8dcd57cfed46ac70c26 Mon Sep 17 00:00:00 2001 From: Anton Toropygin Date: Mon, 4 Dec 2023 22:35:05 +0300 Subject: [PATCH 11/28] add-check-for-both --- .../report_checks/max_abstract_size_check.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py index d6195565..fbeaf8bb 100644 --- a/app/main/checks/report_checks/max_abstract_size_check.py +++ b/app/main/checks/report_checks/max_abstract_size_check.py @@ -8,14 +8,13 @@ class ReportMaxSizeOfAbstractCheck(BaseReportCriterion): def __init__(self, file_info): super().__init__(file_info) self.headers = [] + self.referat_size = 0 + self.abstract_size = 0 self.max_size = 0 def late_init(self): self.headers = self.file.make_headers(self.file_type['report_type']) self.max_size = 1 - - def check(self): - self.late_init() referat_page = 0 abstract_page = 0 main_page = 0 @@ -26,13 +25,19 @@ def check(self): abstract_page = header["page"] if header["name"] == "Содержание": main_page = header["page"] - referat_size = abstract_page - referat_page - abstract_size = main_page - abstract_page - if referat_size > self.max_size: + self.referat_size = abstract_page - referat_page + self.abstract_size = main_page - abstract_page + + def check(self): + self.late_init() + if self.referat_size > self.max_size and self.abstract_size > self.max_size: + return answer(False, + f"

Размеры разделов \"Реферат\" и \"Abstract\" превышает максимальный размер") + if self.referat_size > self.max_size: return answer(False, - f"

Размер раздела \"Реферат\" равен {referat_size} страницы, должен быть {self.max_size}") - if abstract_size > self.max_size: + f"

Размер раздела \"Реферат\" равен {self.referat_size} страницы, должен быть {self.max_size}") + if self.abstract_size > self.max_size: return answer(False, - f"

Размер раздела \"Abstract\" равен {abstract_size} страницы, должен быть {self.max_size}") + f"

Размер раздела \"Abstract\" равен {self.abstract_size} страницы, должен быть {self.max_size}") return answer(True, f"

Размеры разделов \"Реферат\" и \"Abstract\" соответствуют шаблону") From 32d89bcbb51338f14e84d603dc275371bece0a05 Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 18 Apr 2024 21:11:35 +0300 Subject: [PATCH 12/28] base for reload /results --- app/server.py | 8 ++++++-- app/templates/results.html | 3 +++ assets/scripts/results.js | 42 +++++++++++++++++++++++++++++++++----- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/app/server.py b/app/server.py index 20434a16..b4b611dc 100644 --- a/app/server.py +++ b/app/server.py @@ -265,10 +265,14 @@ def recheck(check_id): @login_required def get_status(task_id): task_result = AsyncResult(task_id) + task = ObjectId(task_id) + check = db_methods.get_check(task) + complete_task = check.is_ended result = { "task_id": task_id, "task_status": task_result.status, - "task_result": task_result.result + "task_result": task_result.result, + "complete_task": complete_task } return jsonify(result), 200 @@ -285,7 +289,7 @@ def results(_id): avg_process_time = None if check.is_ended else db_methods.get_average_processing_time() return render_template("./results.html", navi_upload=True, results=check, columns=TABLE_COLUMNS, avg_process_time=avg_process_time, - stats=format_check(check.pack())) + stats=format_check(check.pack()), task_id = _id) else: logger.info("Запрошенная проверка не найдена: " + _id) return render_template("./404.html") diff --git a/app/templates/results.html b/app/templates/results.html index 548ea233..11bcf03a 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -6,6 +6,9 @@ {% block title %}Результаты проверки{% endblock %} {% block main %} +
{% include "header.html" %}
diff --git a/assets/scripts/results.js b/assets/scripts/results.js index 1de5eab7..7dd16833 100644 --- a/assets/scripts/results.js +++ b/assets/scripts/results.js @@ -17,7 +17,7 @@ const renderPage = num => { pageIsRendering = true; pdfDoc.getPage(num).then(page => { - const viewport = page.getViewport({scale}); + const viewport = page.getViewport({ scale }); canvas.height = viewport.height; canvas.width = viewport.width; @@ -89,14 +89,46 @@ if ($("#pdf_download").length !== 0) { pdfjsLib .getDocument(href) .promise.then(pdfDoc_ => { - pdfDoc = pdfDoc_; + pdfDoc = pdfDoc_; - $('#page-count')[0].textContent = pdfDoc.numPages; - renderPage(pageNum); - }); + $('#page-count')[0].textContent = pdfDoc.numPages; + renderPage(pageNum); + }); $('#prev-page').click(showPrevPage); $('#next-page').click(showNextPage); } $('#showAllVerdicts').click(toggleAllVerdicts); + + +// function for automatic reload page after checking: +var reloaded = true + +function checkStatus() { + const intervalId = setInterval(() => { + var request = new XMLHttpRequest(); + request.open('GET', '/tasks/' + task_id, true); + request.onreadystatechange = function () { + if (request.readyState === XMLHttpRequest.DONE) { + if (request.status === 200) { + var response = JSON.parse(request.responseText); + if (response.complete_task && reloaded) { + clearInterval(intervalId); + return; + } else { + reloaded = false + if (response.complete_task) { + window.location.href = '/results/' + task_id; + } + } + } else { + console.error('Request failed:', request.status); + } + } + }; + request.send(); + }, 5000); +} + +checkStatus(); From 334baad749658e1557bb8d7797537393e1e3ae0d Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 18 Apr 2024 21:15:43 +0300 Subject: [PATCH 13/28] message abt time is changed --- app/templates/results.html | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/templates/results.html b/app/templates/results.html index 11bcf03a..d5df46cd 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -24,8 +24,7 @@

{% endif %} {% else %}

- Производится проверка файла. Примерное время: {{ avg_process_time }} секунд (перезагрузите - страницу) + Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }}

{% endif %} From abf76bd3e928c605ab25ab28848c943b02c74ff5 Mon Sep 17 00:00:00 2001 From: Marina Date: Thu, 18 Apr 2024 22:10:57 +0300 Subject: [PATCH 14/28] process time is changed --- app/db/db_methods.py | 12 +++++++----- app/templates/results.html | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/app/db/db_methods.py b/app/db/db_methods.py index b372d607..590098c9 100644 --- a/app/db/db_methods.py +++ b/app/db/db_methods.py @@ -407,11 +407,13 @@ def mark_celery_task_as_finished(celery_task_id, finished_time=None): '$set': {'finished_at': finished_time, 'processing_time': (finished_time - celery_task['started_at']).total_seconds()}}) - -def get_average_processing_time(min_time=5.0, limit=10): - # TODO: use only success check (failed checks processing time is more bigger than normal) - result = list(celery_check_collection.aggregate( - [{'$limit': limit}, {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}}])) +def get_average_processing_time(min_time=5.0, limit=100000): + # use only success check (failed checks processing time is more bigger than normal) + result = list(celery_check_collection.aggregate([ + {'$sample': {'size': limit}}, + {'$match': {'processing_time': {'$lt': 200}}}, + {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}} + ])) if result and result[0]['avg_processing_time']: result = result[0]['avg_processing_time'] if result > min_time: diff --git a/app/templates/results.html b/app/templates/results.html index d5df46cd..d71879d2 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -24,7 +24,7 @@

{% endif %} {% else %}

- Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} + Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} сек.

{% endif %} From e39164bb016f716669a9fade08d4581e92cfde3d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 19 Apr 2024 20:02:36 +0300 Subject: [PATCH 15/28] uodate get_average_processing_time: rm limit, set max time to 170 --- app/db/db_methods.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/db/db_methods.py b/app/db/db_methods.py index 590098c9..ad903e95 100644 --- a/app/db/db_methods.py +++ b/app/db/db_methods.py @@ -407,11 +407,10 @@ def mark_celery_task_as_finished(celery_task_id, finished_time=None): '$set': {'finished_at': finished_time, 'processing_time': (finished_time - celery_task['started_at']).total_seconds()}}) -def get_average_processing_time(min_time=5.0, limit=100000): +def get_average_processing_time(min_time=5.0): # use only success check (failed checks processing time is more bigger than normal) result = list(celery_check_collection.aggregate([ - {'$sample': {'size': limit}}, - {'$match': {'processing_time': {'$lt': 200}}}, + {'$match': {'processing_time': {'$lt': 170}}}, {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}} ])) if result and result[0]['avg_processing_time']: From b4201162efca9c69ebc1b4535d775eed2b1a911f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 18:18:42 +0300 Subject: [PATCH 16/28] revert get_status changes --- app/server.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/server.py b/app/server.py index b4b611dc..0634be39 100644 --- a/app/server.py +++ b/app/server.py @@ -265,14 +265,10 @@ def recheck(check_id): @login_required def get_status(task_id): task_result = AsyncResult(task_id) - task = ObjectId(task_id) - check = db_methods.get_check(task) - complete_task = check.is_ended result = { "task_id": task_id, "task_status": task_result.status, "task_result": task_result.result, - "complete_task": complete_task } return jsonify(result), 200 @@ -289,7 +285,7 @@ def results(_id): avg_process_time = None if check.is_ended else db_methods.get_average_processing_time() return render_template("./results.html", navi_upload=True, results=check, columns=TABLE_COLUMNS, avg_process_time=avg_process_time, - stats=format_check(check.pack()), task_id = _id) + stats=format_check(check.pack())) else: logger.info("Запрошенная проверка не найдена: " + _id) return render_template("./404.html") From 855591f0f39146888d418bd5e27f989b5a831928 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 18:24:14 +0300 Subject: [PATCH 17/28] add new api route for check ready result --- app/server.py | 12 ++ app/templates/results.html | 4 - assets/scripts/results.js | 258 +++++++++++++++++++------------------ 3 files changed, 144 insertions(+), 130 deletions(-) diff --git a/app/server.py b/app/server.py index 0634be39..8a88dd7d 100644 --- a/app/server.py +++ b/app/server.py @@ -290,6 +290,18 @@ def results(_id): logger.info("Запрошенная проверка не найдена: " + _id) return render_template("./404.html") + +@app.route("/api/results/ready/", methods=["GET"]) +def ready_result(_id): + try: + oid = ObjectId(_id) + except bson.errors.InvalidId: + logger.error('_id exception:', exc_info=True) + return {} + check = db_methods.get_check(oid) + if check is not None: + return {"is_ended": check.is_ended} + @app.route("/checks/", methods=["GET"]) @login_required diff --git a/app/templates/results.html b/app/templates/results.html index d75c2d1d..8f05f6f4 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -6,10 +6,6 @@ {% block title %}Результаты проверки{% endblock %} {% block main %} - -
{% include "header.html" %}
{% if results.is_ended %} diff --git a/assets/scripts/results.js b/assets/scripts/results.js index 7dd16833..408581e8 100644 --- a/assets/scripts/results.js +++ b/assets/scripts/results.js @@ -2,133 +2,139 @@ import '../styles/results.css'; import * as pdfjsLib from 'pdfjs-dist'; import pdfjsWorker from "pdfjs-dist/build/pdf.worker.entry"; -let pdfDoc, - pageNum, - pageIsRendering, - pageNumIsPending, - scale, - canvas, - ctx, - currentPage; - -pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker; - -const renderPage = num => { - pageIsRendering = true; - - pdfDoc.getPage(num).then(page => { - const viewport = page.getViewport({ scale }); - canvas.height = viewport.height; - canvas.width = viewport.width; - - const renderCtx = { - canvasContext: ctx, - viewport - }; - - page.render(renderCtx).promise.then(() => { - pageIsRendering = false; - - if (pageNumIsPending !== null) { - renderPage(pageNumIsPending); - pageNumIsPending = null; - } - }); - - $('#page-num')[0].textContent = num; - }); -}; - -const queueRenderPage = num => { - if (pageIsRendering) { - pageNumIsPending = num; - } else { - renderPage(num); - } -}; - -const showPrevPage = () => { - if (pageNum <= 1) { - return; - } - pageNum--; - queueRenderPage(pageNum); -}; - -const showNextPage = () => { - if (pageNum >= pdfDoc.numPages) { - return; - } - pageNum++; - queueRenderPage(pageNum); -}; - -const toggleAllVerdicts = () => { - $('.accordian-body').collapse('toggle'); -}; - -if ($("#pdf_download").length !== 0) { - var href = $("#pdf_download").attr('href'); - pdfDoc = null; - pageNum = 1; - pageIsRendering = false, - pageNumIsPending = null; - scale = 1.1; - canvas = $("#the-canvas")[0]; - ctx = canvas.getContext("2d"); - var href = $("#pdf_download").attr('href'); - pdfDoc = null; - pageNum = 1; - pageIsRendering = false, - pageNumIsPending = null; - scale = 1.1; - - canvas = document.getElementById('the-canvas'); - ctx = canvas.getContext('2d'); - - pdfjsLib - .getDocument(href) - .promise.then(pdfDoc_ => { - pdfDoc = pdfDoc_; - - $('#page-count')[0].textContent = pdfDoc.numPages; - renderPage(pageNum); - }); - - $('#prev-page').click(showPrevPage); - $('#next-page').click(showNextPage); -} - -$('#showAllVerdicts').click(toggleAllVerdicts); - - -// function for automatic reload page after checking: -var reloaded = true - -function checkStatus() { - const intervalId = setInterval(() => { - var request = new XMLHttpRequest(); - request.open('GET', '/tasks/' + task_id, true); - request.onreadystatechange = function () { - if (request.readyState === XMLHttpRequest.DONE) { - if (request.status === 200) { - var response = JSON.parse(request.responseText); - if (response.complete_task && reloaded) { - clearInterval(intervalId); - return; - } else { - reloaded = false - if (response.complete_task) { - window.location.href = '/results/' + task_id; +$(function(){ + if($("#stats_table").length > 0){ + let pdfDoc, + pageNum, + pageIsRendering, + pageNumIsPending, + scale, + canvas, + ctx, + currentPage; + + pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker; + + const renderPage = num => { + pageIsRendering = true; + + pdfDoc.getPage(num).then(page => { + const viewport = page.getViewport({ scale }); + canvas.height = viewport.height; + canvas.width = viewport.width; + + const renderCtx = { + canvasContext: ctx, + viewport + }; + + page.render(renderCtx).promise.then(() => { + pageIsRendering = false; + + if (pageNumIsPending !== null) { + renderPage(pageNumIsPending); + pageNumIsPending = null; } - } + }); + + $('#page-num')[0].textContent = num; + }); + }; + + const queueRenderPage = num => { + if (pageIsRendering) { + pageNumIsPending = num; } else { - console.error('Request failed:', request.status); + renderPage(num); + } + }; + + const showPrevPage = () => { + if (pageNum <= 1) { + return; } + pageNum--; + queueRenderPage(pageNum); + }; + + const showNextPage = () => { + if (pageNum >= pdfDoc.numPages) { + return; + } + pageNum++; + queueRenderPage(pageNum); + }; + + const toggleAllVerdicts = () => { + $('.accordian-body').collapse('toggle'); + }; + + if ($("#pdf_download").length !== 0) { + var href = $("#pdf_download").attr('href'); + pdfDoc = null; + pageNum = 1; + pageIsRendering = false, + pageNumIsPending = null; + scale = 1.1; + canvas = $("#the-canvas")[0]; + ctx = canvas.getContext("2d"); + var href = $("#pdf_download").attr('href'); + pdfDoc = null; + pageNum = 1; + pageIsRendering = false, + pageNumIsPending = null; + scale = 1.1; + + canvas = document.getElementById('the-canvas'); + ctx = canvas.getContext('2d'); + + pdfjsLib + .getDocument(href) + .promise.then(pdfDoc_ => { + pdfDoc = pdfDoc_; + + $('#page-count')[0].textContent = pdfDoc.numPages; + renderPage(pageNum); + }); + + $('#prev-page').click(showPrevPage); + $('#next-page').click(showNextPage); } - }; - request.send(); - }, 5000); -} - -checkStatus(); + + $('#showAllVerdicts').click(toggleAllVerdicts); + + // function for automatic reload page after checking: + let reloaded = true + + function checkStatus() { + const intervalId = setInterval(() => { + let request = new XMLHttpRequest(); + const check_id = window.location.pathname.substr(window.location.pathname.lastIndexOf('/') + 1); + request.open('GET', '/api/results/ready/' + check_id, true); + request.onreadystatechange = function () { + if (request.readyState === XMLHttpRequest.DONE) { + if (request.status === 200) { + let response = JSON.parse(request.responseText); + console.log(response.is_ended) + if (response.is_ended && reloaded) { + clearInterval(intervalId); + return; + } else { + reloaded = false + if (response.is_ended) { + window.location.href = '/results/' + check_id; + } + } + } else { + console.error('Request failed:', request.status); + clearInterval(intervalId); + } + } + }; + request.send(); + }, 5000); + } + + checkStatus(); + } +}); From a2ad613af841eb6a35d7254d0a249f6425695e3e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 18:44:14 +0300 Subject: [PATCH 18/28] preventing late request and early end of check --- assets/scripts/results.js | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/assets/scripts/results.js b/assets/scripts/results.js index 408581e8..831f5dab 100644 --- a/assets/scripts/results.js +++ b/assets/scripts/results.js @@ -106,9 +106,8 @@ $(function(){ // function for automatic reload page after checking: let reloaded = true - function checkStatus() { - const intervalId = setInterval(() => { - let request = new XMLHttpRequest(); + function checkStatus(end_check_function){ + let request = new XMLHttpRequest(); const check_id = window.location.pathname.substr(window.location.pathname.lastIndexOf('/') + 1); request.open('GET', '/api/results/ready/' + check_id, true); request.onreadystatechange = function () { @@ -117,7 +116,7 @@ $(function(){ let response = JSON.parse(request.responseText); console.log(response.is_ended) if (response.is_ended && reloaded) { - clearInterval(intervalId); + end_check_function(); return; } else { reloaded = false @@ -127,14 +126,20 @@ $(function(){ } } else { console.error('Request failed:', request.status); - clearInterval(intervalId); + end_check_function(); } } }; request.send(); + } + + function recheckStatus() { + const intervalId = setInterval(() => { + checkStatus(() => {clearInterval(intervalId)}); }, 5000); } - checkStatus(); + checkStatus(() => {}); + recheckStatus() } }); From 7ec23d74e30b257330f50e6e79945f55ffdafc78 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 20:31:41 +0300 Subject: [PATCH 19/28] update feedback for FindThemeInReport --- app/main/checks/report_checks/find_theme_in_report.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 5edbca1a..38a5873a 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -53,11 +53,14 @@ def check(self): intersection = lemma_theme.intersection(self.full_text) value_intersection = round(len(intersection)*100//len(lemma_theme)) if value_intersection == 0: - return answer(False, f"Не пройдена! В отчете не упоминаются слова, завяленные в теме отчета.") - elif 1 < value_intersection < self.limit: - return answer(False, f"Не пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %).") + return answer(False, "Не пройдена! В отчете не упоминаются слова, заявленные в теме отчета.") + elif value_intersection < self.limit: + return answer( + round(value_intersection/self.limit, 1), + f"Частично пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %)." + ) else: - return answer (True, f'Пройдена! Процент упоминания темы в ответе: {value_intersection} %.') + return answer (True, f'Пройдена! Процент упоминания темы в отчете: {value_intersection} %.') def find_theme(self): stop_words = set(stopwords.words("russian")) From 5c2567bded216f4847475851adf3d9d87db32444 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 21:32:15 +0300 Subject: [PATCH 20/28] add labels to checks (+little fixes for max_size) --- app/main/checks/report_checks/find_theme_in_report.py | 2 +- app/main/checks/report_checks/max_abstract_size_check.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 38a5873a..56dd9a00 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -13,7 +13,7 @@ class FindThemeInReport(BaseReportCriterion): - + label = "Проверка упоминания темы в отчете" description = "Проверка упоминания темы в отчете" id = 'theme_in_report_check' diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py index fbeaf8bb..7cba444e 100644 --- a/app/main/checks/report_checks/max_abstract_size_check.py +++ b/app/main/checks/report_checks/max_abstract_size_check.py @@ -2,19 +2,19 @@ class ReportMaxSizeOfAbstractCheck(BaseReportCriterion): - description = "Максимальный размер раздела Реферат в ВКР" + label = "Максимальный размер раздела Реферат в ВКР" + description = "Максимальный размер раздела Реферат в ВКР (1 стр.)" id = "max_abstract_size_check" - def __init__(self, file_info): + def __init__(self, file_info, max_size=1): super().__init__(file_info) self.headers = [] self.referat_size = 0 self.abstract_size = 0 - self.max_size = 0 + self.max_size = max_size def late_init(self): self.headers = self.file.make_headers(self.file_type['report_type']) - self.max_size = 1 referat_page = 0 abstract_page = 0 main_page = 0 From 6e0121a99fb48469d05f342109ed6fa82a6f89a5 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 21:49:19 +0300 Subject: [PATCH 21/28] split template_name check for pres and report --- .../checks/presentation_checks/__init__.py | 2 +- .../presentation_checks/template_name.py | 2 +- app/main/checks/report_checks/__init__.py | 1 + .../checks/report_checks/template_name.py | 25 +++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 app/main/checks/report_checks/template_name.py diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index 90e517a5..d605c1d3 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -5,7 +5,7 @@ from .sld_enum import SldEnumCheck from .sld_num import SldNumCheck from .sld_similarity import SldSimilarity -from .template_name import TemplateNameCheck +from .template_name import PresTemplateNameCheck from .title_format import TitleFormatCheck from .pres_right_words import PresRightWordsCheck from .image_share import PresImageShareCheck diff --git a/app/main/checks/presentation_checks/template_name.py b/app/main/checks/presentation_checks/template_name.py index eda96303..f73aa7bb 100644 --- a/app/main/checks/presentation_checks/template_name.py +++ b/app/main/checks/presentation_checks/template_name.py @@ -3,7 +3,7 @@ from ..base_check import BasePresCriterion, answer -class TemplateNameCheck(BasePresCriterion): +class PresTemplateNameCheck(BasePresCriterion): label = "Проверка соответствия названия файла шаблону" description = 'Шаблон названия: "Презентация_ВКР_Иванов", "ПРЕЗЕНТАЦИЯ_НИР_ИВАНОВ"' id = 'template_name' diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index cb3118de..50729335 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -23,3 +23,4 @@ from .style_check import ReportStyleCheck from .spelling_check import SpellingCheck from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck +from .template_name import ReportTemplateNameCheck \ No newline at end of file diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py new file mode 100644 index 00000000..9b6e88d7 --- /dev/null +++ b/app/main/checks/report_checks/template_name.py @@ -0,0 +1,25 @@ +import re +from datetime import datetime + + +from ..base_check import BasePresCriterion, answer + +CUR_YEAR = datetime.now().year + + +class ReportTemplateNameCheck(BasePresCriterion): + label = "Проверка соответствия названия файла шаблону" + description = f'Шаблон названия: "{CUR_YEAR}ВКР<номер_студ_билета>ФАМИЛИЯ", например "{CUR_YEAR}ВКР111111ИВАНОВ"' + id = 'template_name' + + def __init__(self, file_info, regex=f"{CUR_YEAR}ВКР[0-9]{6}([А-ЯЁ]+)"): + super().__init__(file_info) + self.filename = self.filename.split('.', 1)[0] + self.reg = regex + + def check(self): + if re.fullmatch(self.reg, self.filename): + return answer(True, "Пройдена!") + else: + return answer(False, + f'Название файла презентации "{self.filename}" не соответствует шаблону: {self.reg}') From 10727a8c6d4ab8b89053c29d566cc8281afcdd0e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 22:04:36 +0300 Subject: [PATCH 22/28] update information for ReportMainCharacterCheck --- app/main/checks/report_checks/main_character_check.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/main/checks/report_checks/main_character_check.py b/app/main/checks/report_checks/main_character_check.py index e29fc7ec..723fb536 100644 --- a/app/main/checks/report_checks/main_character_check.py +++ b/app/main/checks/report_checks/main_character_check.py @@ -3,12 +3,12 @@ class ReportMainCharacterCheck(BaseReportCriterion): label = "Проверка фамилии и должности заведующего кафедрой" - description = 'И.о. зав. кафедрой: А.А. Лисс' + description = 'Зав. кафедрой: А.А. Лисс' id = 'main_character_check' priority = True def __init__(self, file_info, main_character_name_right="А.А. Лисс", main_character_name_wrong="К.В. Кринкин", - main_character_job_right="И.о. зав. кафедрой", main_character_job_wrong="Зав. кафедрой"): + main_character_job_right="Зав. кафедрой", main_character_job_wrong="И.о. зав. кафедрой"): super().__init__(file_info) self.headers = [] self.main_character_name_right = main_character_name_right @@ -31,10 +31,10 @@ def check(self): if text_on_page.find(self.main_character_name_wrong) >= 0 and not text_on_page.find( self.main_character_name_right) >= 0: result_str += f"На странице {self.format_page_link([page])} указана неверная фамилия заведующего " \ - f"кафедрой. Убедитесь, что И.о. зав. кафедрой {self.main_character_name_right}.
" + f"кафедрой. Убедитесь, что {self.main_character_job_right} {self.main_character_name_right}.
" elif not text_on_page.find(self.main_character_name_right) >= 0: result_str += f"На странице {self.format_page_link([page])} не указано ФИО заведующего кафедрой, в " \ - f"графе И.о. зав. кафедрой должно быть указано {self.main_character_name_right}.
" + f"графе {self.main_character_job_right} должно быть указано {self.main_character_name_right}.
" if text_on_page.find(self.main_character_job_wrong) >= 0 and not text_on_page.find( self.main_character_job_right) >= 0: result_str += f'На странице {self.format_page_link([page])} указана неверная должность ' \ From 54f777177cd8517faefe8270727285710ee8cf72 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Apr 2024 22:41:36 +0300 Subject: [PATCH 23/28] little update for ReportPageCounter --- app/main/checks/report_checks/page_counter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/main/checks/report_checks/page_counter.py b/app/main/checks/report_checks/page_counter.py index bd4952e4..fda1447d 100755 --- a/app/main/checks/report_checks/page_counter.py +++ b/app/main/checks/report_checks/page_counter.py @@ -3,11 +3,11 @@ class ReportPageCounter(BaseReportCriterion): label = "Проверка количества страниц в файле" - description = 'Количество страниц должно быть больше 50ти, не считая "Приложения"' + description = 'Количество страниц должно быть в допустимых рамках, не считая "Приложения"' id = 'page_counter' priority = True - def __init__(self, file_info, min_number=50, max_number=None): + def __init__(self, file_info, min_number=50, max_number=150): super().__init__(file_info) self.number = [min_number, max_number] From b185e02ad718a89658d4a1772d67489c6f328f02 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 24 Apr 2024 00:08:58 +0300 Subject: [PATCH 24/28] update feedback ReportTemplateNameCheck --- app/main/checks/report_checks/template_name.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py index 9b6e88d7..4e3991eb 100644 --- a/app/main/checks/report_checks/template_name.py +++ b/app/main/checks/report_checks/template_name.py @@ -22,4 +22,4 @@ def check(self): return answer(True, "Пройдена!") else: return answer(False, - f'Название файла презентации "{self.filename}" не соответствует шаблону: {self.reg}') + f'Название файла презентации "{self.filename}" не соответствует шаблону (Пример: {CUR_YEAR}030301ИВАНОВ)') From 0086c64cdd39b11c40e7ab0945a26f0635c8447e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 24 Apr 2024 00:17:56 +0300 Subject: [PATCH 25/28] add final recheck route --- app/server.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/server.py b/app/server.py index 8a88dd7d..9cb6ea69 100644 --- a/app/server.py +++ b/app/server.py @@ -252,13 +252,21 @@ def recheck(check_id): if not check: abort(404) + + # write files (original and pdf) to filestorage filepath = join(UPLOAD_FOLDER, f"{check_id}.{check.filename.rsplit('.', 1)[-1]}") + pdf_filepath = join(UPLOAD_FOLDER, f"{check_id}.pdf") + db_methods.write_file_from_db_file(oid, filepath) + db_methods.write_file_from_db_file(ObjectId(check.conv_pdf_fs_id), pdf_filepath) + check.is_ended = False db_methods.update_check(check) - db_methods.write_file_from_db_file(oid, filepath) task = create_task.delay(check.pack(to_str=True)) # add check to queue db_methods.add_celery_task(task.id, check_id) # mapping celery_task to check (check_id = file_id) - return {'task_id': task.id, 'check_id': check_id} + if request.args.get('api'): + return {'task_id': task.id, 'check_id': check_id} + else: + return redirect(url_for('results', _id=check_id)) @app.route("/tasks/", methods=["GET"]) From 63979c0d3294c98334336e6b0f4a8666ee8c5ea9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 24 Apr 2024 02:53:03 +0300 Subject: [PATCH 26/28] increase ready check timeout --- assets/scripts/results.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/scripts/results.js b/assets/scripts/results.js index 831f5dab..3f43d45b 100644 --- a/assets/scripts/results.js +++ b/assets/scripts/results.js @@ -136,7 +136,7 @@ $(function(){ function recheckStatus() { const intervalId = setInterval(() => { checkStatus(() => {clearInterval(intervalId)}); - }, 5000); + }, 10000); } checkStatus(() => {}); From 99d368dcaa4685a7bfb91ac9571562ee9028bc17 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 24 Apr 2024 02:53:48 +0300 Subject: [PATCH 27/28] update ReportImageShareCheck --- app/main/checks/report_checks/image_share_check.py | 4 ++-- app/main/reports/docx_uploader/docx_uploader.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py index 0ef4fa1c..c96e3c1e 100644 --- a/app/main/checks/report_checks/image_share_check.py +++ b/app/main/checks/report_checks/image_share_check.py @@ -12,8 +12,8 @@ def __init__(self, file_info, limit=0.3): def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") - images_height = self.file.pdf_file.page_images(page_without_pril=self.file.count) - available_space = self.file.pdf_file.page_height(page_without_pril=self.file.count) + images_height = self.file.pdf_file.page_images(page_without_pril=self.file.page_count) + available_space = self.file.pdf_file.page_height(page_without_pril=self.file.page_count) images_value = images_height/available_space diff --git a/app/main/reports/docx_uploader/docx_uploader.py b/app/main/reports/docx_uploader/docx_uploader.py index 18d901b1..ac30dee4 100644 --- a/app/main/reports/docx_uploader/docx_uploader.py +++ b/app/main/reports/docx_uploader/docx_uploader.py @@ -22,6 +22,7 @@ def __init__(self): self.file = None self.special_paragraph_indices = {} self.headers_page = 0 + self.page_count = 0 def upload(self, file, pdf_filepath=''): self.file = docx.Document(file) From 63e68482cdd2f6b1ea039ffc165cbb5ff9fe8fb5 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 24 Apr 2024 15:42:27 +0300 Subject: [PATCH 28/28] update ReportTemplateNameCheck feedback --- app/main/checks/report_checks/template_name.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py index 4e3991eb..24c245fd 100644 --- a/app/main/checks/report_checks/template_name.py +++ b/app/main/checks/report_checks/template_name.py @@ -22,4 +22,4 @@ def check(self): return answer(True, "Пройдена!") else: return answer(False, - f'Название файла презентации "{self.filename}" не соответствует шаблону (Пример: {CUR_YEAR}030301ИВАНОВ)') + f'Название файла презентации "{self.filename}" не соответствует шаблону (Пример: {CUR_YEAR}ВКР030301ИВАНОВ)')