From edf53032663b2f91b8e0aa71543a823eb1dd62fa Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Thu, 29 Jun 2023 14:41:47 +0300
Subject: [PATCH 01/28] first commit of theme

---
 app/main/check_packs/pack_config.py           |   6 +-
 app/main/checks/__init__.py                   |   7 +-
 .../checks/presentation_checks/__init__.py    |   1 +
 .../presentation_checks/find_def_sld.py       |   9 +-
 .../presentation_checks/find_theme_in_pres.py |  57 +++++++
 app/main/checks/report_checks/__init__.py     |   1 +
 .../report_checks/find_theme_in_report.py     | 150 ++++++++++++++++++
 7 files changed, 223 insertions(+), 8 deletions(-)
 create mode 100644 app/main/checks/presentation_checks/find_theme_in_pres.py
 create mode 100644 app/main/checks/report_checks/find_theme_in_report.py

diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index cf0b85b2..e5f0c93b 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -14,7 +14,8 @@
     ['pres_right_words'],
     ['pres_image_share'],
     ['future_dev'],
-    ['pres_banned_words_check']
+    ['pres_banned_words_check'],
+    ['theme_in_pres_check'],
 ]
 BASE_REPORT_CRITERION = [
     ["simple_check"],
@@ -37,7 +38,8 @@
     ["needed_headers_check"],
     ["header_check"],
     ["report_section_component"],
-    ["main_text_check"]
+    ["main_text_check"],
+    ["theme_in_report_check"],
 ]
 
 DEFAULT_TYPE = 'pres'
diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py
index 8f65c507..7b7acb68 100644
--- a/app/main/checks/__init__.py
+++ b/app/main/checks/__init__.py
@@ -17,7 +17,8 @@
         PresRightWordsCheck.id: PresRightWordsCheck,
         PresImageShareCheck.id: PresImageShareCheck,
         FurtherDev.id: FurtherDev,
-        PresBannedWordsCheck.id: PresBannedWordsCheck
+        PresBannedWordsCheck.id: PresBannedWordsCheck,
+        FindThemeInPres.id: FindThemeInPres,
     },
     'report': {
         ReportSimpleCheck.id: ReportSimpleCheck,
@@ -38,6 +39,8 @@
         ReportNeededHeadersCheck.id: ReportNeededHeadersCheck,
         ReportChapters.id: ReportChapters,
         ReportSectionComponent.id: ReportSectionComponent,
-        ReportMainTextCheck.id: ReportMainTextCheck
+        ReportMainTextCheck.id: ReportMainTextCheck,
+        FindThemeInReport.id: FindThemeInReport,
+
     }
 }
diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py
index 81ec9cbd..ca032913 100644
--- a/app/main/checks/presentation_checks/__init__.py
+++ b/app/main/checks/presentation_checks/__init__.py
@@ -10,3 +10,4 @@
 from .pres_right_words import PresRightWordsCheck
 from .image_share import PresImageShareCheck
 from .banned_words import PresBannedWordsCheck
+from .find_theme_in_pres import FindThemeInPres
diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py
index 46c64b0f..33a66ff5 100644
--- a/app/main/checks/presentation_checks/find_def_sld.py
+++ b/app/main/checks/presentation_checks/find_def_sld.py
@@ -8,18 +8,19 @@ class FindDefSld(BasePresCriterion):
     def __init__(self, file_info, key_slide):
         super().__init__(file_info)
         self.type_of_slide = key_slide
+        self.found_idxs = []
 
     def check(self):
-        found_slides, found_idxs = [], []
+        found_slides = []
         for i, title in enumerate(self.file.get_titles(), 1):
             if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
                 found_slides.append(self.file.get_text_from_slides()[i - 1])
-                found_idxs.append(i)
+                self.found_idxs.append(i)
         if len(found_slides) == 0:
             return answer(False, 'Слайд не найден')
         else:
-            found_idxs = self.format_page_link(found_idxs)
-            return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs))))
+            found_idxs_link = self.format_page_link(self.found_idxs)
+            return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))
 
     @property
     def name(self):
diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py
new file mode 100644
index 00000000..c55f64b9
--- /dev/null
+++ b/app/main/checks/presentation_checks/find_theme_in_pres.py
@@ -0,0 +1,57 @@
+
+from ..base_check import BasePresCriterion, answer
+from .find_def_sld import FindDefSld
+from app.nlp.stemming import Stemming
+
+import  string
+import nltk
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.corpus import stopwords
+from pymorphy2 import MorphAnalyzer
+
+nltk.download('stopwords')
+MORPH_ANALYZER = MorphAnalyzer()
+
+
+class FindThemeInPres(BasePresCriterion):
+
+    description = "Проверка упоминания темы в презентации"
+    id = 'theme_in_pres_check'
+
+    def __init__(self, file_info):
+        super().__init__(file_info)
+        self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
+
+    def check(self):
+
+        stop_words = set(stopwords.words("russian"))
+
+        self.check_conclusion.check()
+        page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs")))
+
+        text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion]
+        theme = ''.join(word for word in text_from_title[0])
+
+        translator = str.maketrans('', '', string.punctuation)
+        theme_without_punct = theme.translate(translator)
+        words_in_theme = word_tokenize(theme_without_punct)
+        # for word in words_in_theme:
+        lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words}
+
+
+        text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1]
+        string_from_text = ''.join(text_from_slide)
+
+        text_without_punct = string_from_text.translate(translator)
+        words_in_text = word_tokenize(text_without_punct)
+
+        lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words}
+
+        intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100
+
+        if intersection == 0:
+            return answer(False, f"Не пройдена! {intersection}")
+        elif 1 < intersection < 40:
+            return answer(False, f"Обратите внимание! {intersection} %")
+        else:
+            return answer (True, f'Пройдена! {intersection} %')
diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py
index a2fa3819..a8ef3c6b 100644
--- a/app/main/checks/report_checks/__init__.py
+++ b/app/main/checks/report_checks/__init__.py
@@ -17,4 +17,5 @@
 from .short_sections_check import ReportShortSectionsCheck
 from .simple_check import ReportSimpleCheck
 from .style_check_settings import StyleCheckSettings
+from .find_theme_in_report import FindThemeInReport
 
diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py
new file mode 100644
index 00000000..3002704c
--- /dev/null
+++ b/app/main/checks/report_checks/find_theme_in_report.py
@@ -0,0 +1,150 @@
+import re
+import string
+
+from ..base_check import BaseReportCriterion, answer
+# from .find_def_sld import FindDefSld
+# from app.nlp.stemming import Stemming
+from ...reports.pdf_document.pdf_document_manager import PdfDocumentManager
+import pdfplumber
+from ...reports.docx_uploader import DocxUploader
+
+import  string
+import nltk
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.corpus import stopwords
+from pymorphy2 import MorphAnalyzer
+
+nltk.download('stopwords')
+MORPH_ANALYZER = MorphAnalyzer()
+
+
+class FindThemeInReport(BaseReportCriterion):
+
+    description = "Проверка упоминания темы в отчете"
+    id = 'theme_in_report_check'
+
+    def __init__(self, file_info):
+        super().__init__(file_info)
+        self.intro = {}
+        self.chapters = []
+        self.text_par = []
+        self.full_text = set()
+
+    def late_init(self):
+        self.chapters = self.file.make_chapters(self.file_type['report_type'])
+
+    def check(self):
+        stop_words = set(stopwords.words("russian"))
+        if self.file.page_counter() < 4:
+            return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
+
+        self.late_init()
+        for intro in self.chapters:
+            header = intro["text"].lower()
+            if header not in ['заключение', "введение", "список использованных источников", "условные обозначения"]:
+                self.intro = intro
+                for intro_par in self.intro['child']:
+                    par = intro_par['text'].lower()
+                    self.text_par.append(par)
+        lemma_theme = self.find_theme()
+        for i in self.text_par:
+            translator = str.maketrans('', '', string.punctuation)
+            theme_without_punct = i.translate(translator)
+            word_in_text = word_tokenize(theme_without_punct)
+            lemma_text = {MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words}
+            self.full_text.update(lemma_text)
+
+        intersection = lemma_theme.intersection(self.full_text)
+        int_pr = round(len(intersection)*100//len(lemma_theme))
+
+        return answer(True, f'{lemma_theme} {intersection} hhh {int_pr}')
+
+
+
+
+
+
+    def find_theme(self):
+        stop_words = set(stopwords.words("russian"))
+        lemma_theme = []
+        for key, text_on_page in self.file.pdf_file.get_text_on_page().items():
+            if key == 1:
+                lower_text = text_on_page.lower()
+                text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
+                list_full = text_without_punct.split()
+                start = list_full.index('тема') + 1
+                end = list_full.index('студент')
+                list_theme = list_full[start:end]
+                lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
+                                word not in stop_words}
+            return lemma_theme
+
+
+
+
+
+        # full_text_pre = self.file.pdf_file.text_on_page
+        # full_text = ''.jo
+        # start_text = full_text.index['1.']
+        # end_text = full_text.index['ЗАКЛЮЧЕНИЕ']
+        # text_for_analys = full_text[start_text:end_text]
+        # lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in text_for_analys if word not in stop_words}
+
+        # for text_on_page in self.file.pdf_file.get_text_on_page().values():
+        #
+        #     lower_text = text_on_page.lower()
+        #     text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
+        #     list_full = text_without_punct.split()
+        #     start = list_full.index('тема')
+        #     end = list_full.index('студент')
+        #     list_theme = list_full[start:end]
+        #     lemma_theme = ({MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
+        #                    word not in stop_words})
+
+
+
+
+
+
+# class FindThemeInReport(BaseReportCriterion):
+#
+#     description = "Проверка упоминания темы в отчете"
+#     id = 'theme_in_report_check'
+#
+#     def __init__(self, file_info):
+#         super().__init__(file_info)
+#         self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
+#
+#     def check(self):
+#
+#         stop_words = set(stopwords.words("russian"))
+#
+#         self.check_conclusion.check()
+#         page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs")))
+#
+#         text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion]
+#         theme = ''.join(word for word in text_from_title[0])
+#
+#         translator = str.maketrans('', '', string.punctuation)
+#         theme_without_punct = theme.translate(translator)
+#         words_in_theme = word_tokenize(theme_without_punct)
+#         # for word in words_in_theme:
+#         lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words}
+#
+#
+#         text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1]
+#         string_from_text = ''.join(text_from_slide)
+#
+#         text_without_punct = string_from_text.translate(translator)
+#         words_in_text = word_tokenize(text_without_punct)
+#
+#         lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words}
+#
+#         intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100
+#
+#         if intersection == 0:
+#             return answer(False, f"Не пройдена! {intersection}")
+#         elif 1 < intersection < 40:
+#             return answer(False, f"Обратите внимание! {intersection} %")
+#         else:
+#             return answer (True, f'Пройдена! {intersection} %')

From d9d6e6ae050ab78d3954adfbd782045a453283e5 Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Tue, 4 Jul 2023 12:16:14 +0300
Subject: [PATCH 02/28] fix image share check

---
 .../checks/report_checks/image_share_check.py | 47 +++++++++----------
 .../pdf_document/pdf_document_manager.py      | 27 ++++++++++-
 requirements.txt                              | 13 +++--
 3 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py
index 1883a7a4..7929816f 100644
--- a/app/main/checks/report_checks/image_share_check.py
+++ b/app/main/checks/report_checks/image_share_check.py
@@ -1,6 +1,5 @@
 from ..base_check import BaseReportCriterion, answer
 
-
 class ReportImageShareCheck(BaseReportCriterion):
     description = "Проверка доли объема отчёта, приходящейся на изображения"
     id = 'image_share_check'
@@ -12,27 +11,25 @@ def __init__(self, file_info, limit=0.3):
     def check(self):
         if self.file.page_counter() < 4:
             return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
-        images_height = 0
-        for image in self.file.inline_shapes:
-            images_height += image.height.cm
-        if len(self.file.file.sections):
-            available_space = self.file.file.sections[0].page_height.cm - self.file.file.sections[0].bottom_margin.cm - \
-                              self.file.file.sections[0].top_margin.cm
-            images_pages = images_height / available_space
-            share = images_pages / self.file.count
-            if share > self.limit:
-                result_str = f'Проверка не пройдена! Изображения в работе занимают около {round(share, 2)} объема ' \
-                             f'документа без учета приложения, ограничение - {round(self.limit, 2)}'
-                result_str += '''
-                            Если доля отчета, приходящаяся на изображения, больше нормы, попробуйте сделать следующее:
-                            <ul>
-                                <li>Попробуйте перенести малозначимые иллюстрации в Приложение;</li>
-                                <li>Если у вас уже есть раздел Приложение, убедитесь, что количество страниц в отчете посчитано программой без учета приложения;</li>
-                                <li>Если страницы посчитаны программой неверно, убедитесь, что заголовок приложения правильно оформлен;</li>
-                                <li>Убедитесь, что красная строка не сделана с помощью пробелов или табуляции.</li>
-                            </ul>
-                            '''
-                return answer(False, result_str)
-            else:
-                return answer(True, f'Пройдена!')
-        return answer(False, 'Во время обработки произошла критическая ошибка')
+        images_height = self.file.pdf_file.page_images()
+        available_space = self.file.pdf_file.page_height()
+
+        images_value = images_height/available_space
+
+        if images_value > self.limit:
+            result_str = f'Проверка не пройдена! Изображения в работе занимают около {round(images_value, 2)} объема ' \
+                         f'документа без учета приложения, ограничение - {round(self.limit, 2)}'
+            result_str += '''
+                        Если доля отчета, приходящаяся на изображения, больше нормы, попробуйте сделать следующее:
+                        <ul>
+                            <li>Попробуйте перенести малозначимые иллюстрации в Приложение;</li>
+                            <li>Если у вас уже есть раздел Приложение, убедитесь, что количество страниц в отчете посчитано программой без учета приложения;</li>
+                            <li>Если страницы посчитаны программой неверно, убедитесь, что заголовок приложения правильно оформлен;</li>
+                            <li>Убедитесь, что красная строка не сделана с помощью пробелов или табуляции.</li>
+                        </ul>
+                        '''
+            return answer(False, result_str)
+        else:
+            return answer(True, f'Пройдена!')
+
+        # return answer(False, f'Во время обработки произошла критическая ошибка')
diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py
index ddc125e0..d5679653 100644
--- a/app/main/reports/pdf_document/pdf_document_manager.py
+++ b/app/main/reports/pdf_document/pdf_document_manager.py
@@ -1,14 +1,18 @@
+
 import pdfplumber
+import fitz
 
-from app.utils import convert_to
 
+from app.utils import convert_to
 
 class PdfDocumentManager:
     def __init__(self, path_to_file, pdf_filepath=''):
         if not pdf_filepath:
             self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf'))
+            self.pdf_fitz = fitz.open(convert_to(path_to_file, target_format='pdf'))
         else:
             self.pdf_file = pdfplumber.open(pdf_filepath)
+            self.pdf_fitz = fitz.open(pdf_filepath)
         self.pages = self.pdf_file.pages
         self.page_count = len(self.pages)
         self.text_on_page = self.get_text_on_page()
@@ -18,6 +22,27 @@ def __init__(self, path_to_file, pdf_filepath=''):
     def get_text_on_page(self):
         return {page + 1: self.pages[page].extract_text() for page in range(self.page_count)}
 
+    def page_images(self):
+        total_height = 0
+        for page_num in range(self.page_count):
+            page = self.pdf_fitz[page_num]
+            images = self.pdf_fitz.get_page_images(page)
+            for image in images:
+                image_coord = page.get_image_bbox(image[7], transform=0)
+                total_height += (image_coord[3] - image_coord[1])
+
+        return total_height
+
+    def page_height(self):
+        page = self.pdf_fitz[0]   # get first page as a sample
+        page_rect = page.rect
+        height = page_rect.height
+        top_margin = page_rect.y0
+        bottom_margin = height - page_rect.y1
+        available_space = (height - top_margin - bottom_margin)*self.page_count
+
+        return available_space
+
     # def get_only_text_on_page(self):
     #     if not self.only_text_on_page:
     #         only_text_on_page = {}
diff --git a/requirements.txt b/requirements.txt
index 9acf11fc..afe3a243 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,13 @@
 werkzeug==2.0.0
 Flask==2.0.3
 jinja2==3.0.0
-requests==2.24.0
+requests~=2.31.0
 python-pptx==0.6.18
 odfpy==1.4.1
 pymongo==3.11.1
 flask-login==0.5.0
 numpy==1.22
-scipy==1.7.1
+scipy~=1.10.1
 pymorphy2==0.9.1
 nltk==3.6.6
 flask-recaptcha==0.4.2
@@ -16,7 +16,7 @@ flask-security==3.0.0
 celery==5.2.2
 flower==1.2.0
 redis==3.5.3
-pandas==1.3.4
+pandas~=2.0.3
 fsspec==2022.2.0
 python-docx==0.8.11
 odfpy==1.4.1
@@ -25,3 +25,10 @@ docx2python~=2.0.4
 oauthlib~=3.1.0
 pdfplumber==0.6.1
 pytest~=7.1.2
+
+PyMuPDF~=1.22.5
+PyPDF2~=3.0.1
+
+configparser~=5.3.0
+pytz~=2023.3
+urllib3~=2.0.3
\ No newline at end of file

From b837d60431473df97267cf9c6450630f6f86c6ba Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Tue, 4 Jul 2023 18:35:39 +0300
Subject: [PATCH 03/28] theme in text light check

---
 .../presentation_checks/find_theme_in_pres.py |  19 ++--
 .../report_checks/find_theme_in_report.py     | 100 +++---------------
 2 files changed, 23 insertions(+), 96 deletions(-)

diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py
index c55f64b9..a0dd583c 100644
--- a/app/main/checks/presentation_checks/find_theme_in_pres.py
+++ b/app/main/checks/presentation_checks/find_theme_in_pres.py
@@ -3,7 +3,7 @@
 from .find_def_sld import FindDefSld
 from app.nlp.stemming import Stemming
 
-import  string
+import string
 import nltk
 from nltk.tokenize import word_tokenize, sent_tokenize
 from nltk.corpus import stopwords
@@ -18,9 +18,10 @@ class FindThemeInPres(BasePresCriterion):
     description = "Проверка упоминания темы в презентации"
     id = 'theme_in_pres_check'
 
-    def __init__(self, file_info):
+    def __init__(self, file_info, limit = 40):
         super().__init__(file_info)
         self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
+        self.limit = limit
 
     def check(self):
 
@@ -35,7 +36,6 @@ def check(self):
         translator = str.maketrans('', '', string.punctuation)
         theme_without_punct = theme.translate(translator)
         words_in_theme = word_tokenize(theme_without_punct)
-        # for word in words_in_theme:
         lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words}
 
 
@@ -47,11 +47,12 @@ def check(self):
 
         lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words}
 
-        intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100
+        value_intersection = round(len(lemma_theme.intersection(lemma_text))*100//len(lemma_theme))
 
-        if intersection == 0:
-            return answer(False, f"Не пройдена! {intersection}")
-        elif 1 < intersection < 40:
-            return answer(False, f"Обратите внимание! {intersection} %")
+        if value_intersection == 0:
+            return answer(False, f"Не пройдена! В презентации не упоминаются слова, завяленные в теме.")
+        elif 1 < value_intersection < self.limit:
+            return answer(False,
+                          f"Не пройдена! Процент упоминания темы в вашей презентации ({value_intersection} %) ниже требуемого ({self.limit} %).")
         else:
-            return answer (True, f'Пройдена! {intersection} %')
+            return answer(True, f'Пройдена! Процент упоминания темы в презентации: {value_intersection} %')
diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py
index 3002704c..e19ba449 100644
--- a/app/main/checks/report_checks/find_theme_in_report.py
+++ b/app/main/checks/report_checks/find_theme_in_report.py
@@ -2,11 +2,6 @@
 import string
 
 from ..base_check import BaseReportCriterion, answer
-# from .find_def_sld import FindDefSld
-# from app.nlp.stemming import Stemming
-from ...reports.pdf_document.pdf_document_manager import PdfDocumentManager
-import pdfplumber
-from ...reports.docx_uploader import DocxUploader
 
 import  string
 import nltk
@@ -23,12 +18,13 @@ class FindThemeInReport(BaseReportCriterion):
     description = "Проверка упоминания темы в отчете"
     id = 'theme_in_report_check'
 
-    def __init__(self, file_info):
+    def __init__(self, file_info, limit = 40):
         super().__init__(file_info)
         self.intro = {}
         self.chapters = []
         self.text_par = []
         self.full_text = set()
+        self.limit = limit
 
     def late_init(self):
         self.chapters = self.file.make_chapters(self.file_type['report_type'])
@@ -47,22 +43,22 @@ def check(self):
                     par = intro_par['text'].lower()
                     self.text_par.append(par)
         lemma_theme = self.find_theme()
-        for i in self.text_par:
+
+        for text in self.text_par:
             translator = str.maketrans('', '', string.punctuation)
-            theme_without_punct = i.translate(translator)
+            theme_without_punct = text.translate(translator)
             word_in_text = word_tokenize(theme_without_punct)
             lemma_text = {MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words}
             self.full_text.update(lemma_text)
 
         intersection = lemma_theme.intersection(self.full_text)
-        int_pr = round(len(intersection)*100//len(lemma_theme))
-
-        return answer(True, f'{lemma_theme} {intersection} hhh {int_pr}')
-
-
-
-
-
+        value_intersection = round(len(intersection)*100//len(lemma_theme))
+        if value_intersection == 0:
+            return answer(False, f"Не пройдена! В отчете не упоминаются слова, завяленные в теме отчета.")
+        elif 1 < value_intersection < self.limit:
+            return answer(False, f"Не пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %).")
+        else:
+            return answer (True, f'Пройдена! Процент упоминания темы в ответе: {value_intersection} %.')
 
     def find_theme(self):
         stop_words = set(stopwords.words("russian"))
@@ -77,74 +73,4 @@ def find_theme(self):
                 list_theme = list_full[start:end]
                 lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
                                 word not in stop_words}
-            return lemma_theme
-
-
-
-
-
-        # full_text_pre = self.file.pdf_file.text_on_page
-        # full_text = ''.jo
-        # start_text = full_text.index['1.']
-        # end_text = full_text.index['ЗАКЛЮЧЕНИЕ']
-        # text_for_analys = full_text[start_text:end_text]
-        # lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in text_for_analys if word not in stop_words}
-
-        # for text_on_page in self.file.pdf_file.get_text_on_page().values():
-        #
-        #     lower_text = text_on_page.lower()
-        #     text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
-        #     list_full = text_without_punct.split()
-        #     start = list_full.index('тема')
-        #     end = list_full.index('студент')
-        #     list_theme = list_full[start:end]
-        #     lemma_theme = ({MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
-        #                    word not in stop_words})
-
-
-
-
-
-
-# class FindThemeInReport(BaseReportCriterion):
-#
-#     description = "Проверка упоминания темы в отчете"
-#     id = 'theme_in_report_check'
-#
-#     def __init__(self, file_info):
-#         super().__init__(file_info)
-#         self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
-#
-#     def check(self):
-#
-#         stop_words = set(stopwords.words("russian"))
-#
-#         self.check_conclusion.check()
-#         page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs")))
-#
-#         text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion]
-#         theme = ''.join(word for word in text_from_title[0])
-#
-#         translator = str.maketrans('', '', string.punctuation)
-#         theme_without_punct = theme.translate(translator)
-#         words_in_theme = word_tokenize(theme_without_punct)
-#         # for word in words_in_theme:
-#         lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words}
-#
-#
-#         text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1]
-#         string_from_text = ''.join(text_from_slide)
-#
-#         text_without_punct = string_from_text.translate(translator)
-#         words_in_text = word_tokenize(text_without_punct)
-#
-#         lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words}
-#
-#         intersection = round(len(lemma_theme.intersection(lemma_text))//len(lemma_theme))*100
-#
-#         if intersection == 0:
-#             return answer(False, f"Не пройдена! {intersection}")
-#         elif 1 < intersection < 40:
-#             return answer(False, f"Обратите внимание! {intersection} %")
-#         else:
-#             return answer (True, f'Пройдена! {intersection} %')
+            return lemma_theme
\ No newline at end of file

From 116c5e803d2c30e582c2e8d98716b5445f97f452 Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Thu, 13 Jul 2023 18:20:22 +0300
Subject: [PATCH 04/28] optimize with add found_index

---
 .../checks/presentation_checks/find_def_sld.py    |  4 ++++
 .../presentation_checks/find_theme_in_pres.py     | 15 ++++++++-------
 app/main/presentations/odp/presentation_odp.py    |  1 +
 app/main/presentations/pptx/presentation_pptx.py  |  2 ++
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py
index 33a66ff5..fbb68ad8 100644
--- a/app/main/checks/presentation_checks/find_def_sld.py
+++ b/app/main/checks/presentation_checks/find_def_sld.py
@@ -19,6 +19,10 @@ def check(self):
         if len(found_slides) == 0:
             return answer(False, 'Слайд не найден')
         else:
+            if self.type_of_slide == 'Заключение':
+                self.file.found_index['Заключение'] = ''.join(str(item) for item in self.found_idxs)
+            else:
+                self.file.found_index['Заключение'] = None
             found_idxs_link = self.format_page_link(self.found_idxs)
             return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))
 
diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py
index a0dd583c..51518946 100644
--- a/app/main/checks/presentation_checks/find_theme_in_pres.py
+++ b/app/main/checks/presentation_checks/find_theme_in_pres.py
@@ -18,27 +18,28 @@ class FindThemeInPres(BasePresCriterion):
     description = "Проверка упоминания темы в презентации"
     id = 'theme_in_pres_check'
 
-    def __init__(self, file_info, limit = 40):
+    def __init__(self, file_info, limit = 60):
         super().__init__(file_info)
-        self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
+        # self.check_conclusion = FindDefSld(file_info=file_info, key_slide="Заключение")
         self.limit = limit
 
     def check(self):
 
         stop_words = set(stopwords.words("russian"))
+        if self.file.found_index['Заключение'] is not None:
+            page_conclusion = self.file.found_index['Заключение']
 
-        self.check_conclusion.check()
-        page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs")))
+        # self.check_conclusion.check()
+        # page_conclusion = ''.join((str(item) for item in self.check_conclusion.__getattribute__("found_idxs")))
 
-        text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion]
-        theme = ''.join(word for word in text_from_title[0])
+            text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if str(page) != page_conclusion]
+            theme = ''.join(word for word in text_from_title[0])
 
         translator = str.maketrans('', '', string.punctuation)
         theme_without_punct = theme.translate(translator)
         words_in_theme = word_tokenize(theme_without_punct)
         lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words}
 
-
         text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1]
         string_from_text = ''.join(text_from_slide)
 
diff --git a/app/main/presentations/odp/presentation_odp.py b/app/main/presentations/odp/presentation_odp.py
index bb3a66d5..90346696 100644
--- a/app/main/presentations/odp/presentation_odp.py
+++ b/app/main/presentations/odp/presentation_odp.py
@@ -12,6 +12,7 @@ def __init__(self, presentation_name):
         self.prs = opendocument.load(presentation_name)
         self.parse_styles()
         self.add_slides()
+        self.found_index = {}
 
     def add_slides(self):
         for slide in self.prs.getElementsByType(draw.Page):
diff --git a/app/main/presentations/pptx/presentation_pptx.py b/app/main/presentations/pptx/presentation_pptx.py
index 869846e0..b01d0b40 100644
--- a/app/main/presentations/pptx/presentation_pptx.py
+++ b/app/main/presentations/pptx/presentation_pptx.py
@@ -9,6 +9,8 @@ def __init__(self, presentation_name):
         PresentationBasic.__init__(self, presentation_name)
         self.prs = Presentation(presentation_name)
         self.add_slides()
+        self.found_index = {}
+
 
     def add_slides(self):
         for index, slide in enumerate(self.prs.slides, 1):

From 5523f0a4b12941e66d830e1c9f054ac27955b643 Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Tue, 1 Aug 2023 16:48:48 +0300
Subject: [PATCH 05/28] fix page_count (without pril)

---
 .../headers_at_page_top_check.py              |  2 +-
 .../checks/report_checks/image_share_check.py |  4 +-
 .../report_checks/literature_references.py    |  2 +-
 .../pdf_document/pdf_document_manager.py      | 38 ++++++++++---------
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/app/main/checks/report_checks/headers_at_page_top_check.py b/app/main/checks/report_checks/headers_at_page_top_check.py
index 4be9ad21..1fb728ae 100644
--- a/app/main/checks/report_checks/headers_at_page_top_check.py
+++ b/app/main/checks/report_checks/headers_at_page_top_check.py
@@ -25,7 +25,7 @@ def check(self):
         if self.file_type["report_type"] == 'LR':
             for header in self.headers:
                 found = False
-                for page_num in range(1, self.pdf.page_count):
+                for page_num in range(1, self.pdf.page_count_all):
                     lines = self.pdf.text_on_page[page_num + 1].split("\n")
                     last_header_line = 0
                     collected_text = ""
diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py
index 7929816f..2f68b39a 100644
--- a/app/main/checks/report_checks/image_share_check.py
+++ b/app/main/checks/report_checks/image_share_check.py
@@ -11,8 +11,8 @@ def __init__(self, file_info, limit=0.3):
     def check(self):
         if self.file.page_counter() < 4:
             return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
-        images_height = self.file.pdf_file.page_images()
-        available_space = self.file.pdf_file.page_height()
+        images_height = self.file.pdf_file.page_images(page_without_pril=self.file.count)
+        available_space = self.file.pdf_file.page_height(page_without_pril=self.file.count)
 
         images_value = images_height/available_space
 
diff --git a/app/main/checks/report_checks/literature_references.py b/app/main/checks/report_checks/literature_references.py
index b4ed3335..8ecf1c1d 100644
--- a/app/main/checks/report_checks/literature_references.py
+++ b/app/main/checks/report_checks/literature_references.py
@@ -129,7 +129,7 @@ def count_sources(self):
 
     def search_literature_start_pdf(self):
         start_page = 0
-        end_page = self.file.pdf_file.page_count
+        end_page = self.file.pdf_file.page_count_all
         for i in self.file.pdf_file.text_on_page.keys():
             lowercase_str = self.file.pdf_file.text_on_page[i].lower()
             if re.search(self.name_pattern, lowercase_str):
diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py
index d5679653..45c952d5 100644
--- a/app/main/reports/pdf_document/pdf_document_manager.py
+++ b/app/main/reports/pdf_document/pdf_document_manager.py
@@ -1,5 +1,5 @@
 
-import pdfplumber
+# import pdfplumber
 import fitz
 
 
@@ -8,38 +8,42 @@
 class PdfDocumentManager:
     def __init__(self, path_to_file, pdf_filepath=''):
         if not pdf_filepath:
-            self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf'))
-            self.pdf_fitz = fitz.open(convert_to(path_to_file, target_format='pdf'))
+            # self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf'))
+            self.pdf_file = fitz.open(convert_to(path_to_file, target_format='pdf'))
         else:
-            self.pdf_file = pdfplumber.open(pdf_filepath)
-            self.pdf_fitz = fitz.open(pdf_filepath)
-        self.pages = self.pdf_file.pages
-        self.page_count = len(self.pages)
+            # self.pdf_file = pdfplumber.open(pdf_filepath)
+            self.pdf_file = fitz.open(pdf_filepath)
+        self.pages = [self.pdf_file.load_page(page_num) for page_num in range(self.pdf_file.page_count)]
+        self.page_count_all = self.pdf_file.page_count
+        # self.page_count = len(self.pages)
+        # self.pages = self.pdf_file.pages
         self.text_on_page = self.get_text_on_page()
         # self.bboxes = []
         # self.only_text_on_page = {}
 
     def get_text_on_page(self):
-        return {page + 1: self.pages[page].extract_text() for page in range(self.page_count)}
+        return {page_num + 1: page.get_text() for page_num, page in enumerate(self.pages)}
 
-    def page_images(self):
+    # def get_text_on_page(self):
+    #     return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)}
+
+    def page_images(self, page_without_pril):
         total_height = 0
-        for page_num in range(self.page_count):
-            page = self.pdf_fitz[page_num]
-            images = self.pdf_fitz.get_page_images(page)
+        for page_num in range(page_without_pril):
+            page = self.pdf_file[page_num]
+            images = self.pdf_file.get_page_images(page)
             for image in images:
                 image_coord = page.get_image_bbox(image[7], transform=0)
                 total_height += (image_coord[3] - image_coord[1])
 
         return total_height
 
-    def page_height(self):
-        page = self.pdf_fitz[0]   # get first page as a sample
+    def page_height(self, page_without_pril):
+        page = self.pdf_file[0]   # get first page as a sample
         page_rect = page.rect
-        height = page_rect.height
-        top_margin = page_rect.y0
+        height, top_margin = page_rect.height, page_rect.y0
         bottom_margin = height - page_rect.y1
-        available_space = (height - top_margin - bottom_margin)*self.page_count
+        available_space = (height - top_margin - bottom_margin)*page_without_pril
 
         return available_space
 

From 488e8cd65d8bb134a7dffdf7ce63bd7cb74258cd Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Wed, 8 Nov 2023 12:29:27 +0300
Subject: [PATCH 06/28] fix conflicts

---
 requirements.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index afe3a243..46b14691 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,10 +25,8 @@ docx2python~=2.0.4
 oauthlib~=3.1.0
 pdfplumber==0.6.1
 pytest~=7.1.2
-
 PyMuPDF~=1.22.5
 PyPDF2~=3.0.1
-
 configparser~=5.3.0
 pytz~=2023.3
 urllib3~=2.0.3
\ No newline at end of file

From 9ff54aabb0dfbf9cfc9cbe40a1faec72bdbc56ea Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_viro@mail.ru>
Date: Wed, 8 Nov 2023 13:03:27 +0300
Subject: [PATCH 07/28] fix mistakes

---
 app/main/check_packs/pack_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index df7ef82c..b7456f0b 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -15,7 +15,7 @@
     ['pres_image_share'],
     ['future_dev'],
     ['pres_banned_words_check'],
-    ['pres_empty_slide'],,
+    ['pres_empty_slide'],
     ['theme_in_pres_check'],
 ]
 BASE_REPORT_CRITERION = [

From d5b081def70ae21192e4ccaef9dc7b04550305a4 Mon Sep 17 00:00:00 2001
From: Anton Toropygin <antontoropygin4@gmail.com>
Date: Thu, 23 Nov 2023 15:32:22 +0300
Subject: [PATCH 08/28] max_abstract_init

---
 app/main/check_packs/pack_config.py                  |  3 ++-
 app/main/checks/__init__.py                          |  3 ++-
 app/main/checks/report_checks/__init__.py            |  2 +-
 .../checks/report_checks/max_abstract_size_check.py  | 12 ++++++++++++
 4 files changed, 17 insertions(+), 3 deletions(-)
 create mode 100644 app/main/checks/report_checks/max_abstract_size_check.py

diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index 0639689b..46ba2693 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -39,7 +39,8 @@
     ["header_check"],
     ["report_section_component"],
     ["main_text_check"],
-    ["spelling_check"]
+    ["spelling_check"],
+    ["max_abstract_size_check"],
 ]
 
 DEFAULT_TYPE = 'pres'
diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py
index 8e643b62..d6ad8daa 100644
--- a/app/main/checks/__init__.py
+++ b/app/main/checks/__init__.py
@@ -37,6 +37,7 @@
         ReportChapters.id: ReportChapters,
         ReportSectionComponent.id: ReportSectionComponent,
         ReportMainTextCheck.id: ReportMainTextCheck,
-        SpellingCheck.id: SpellingCheck
+        SpellingCheck.id: SpellingCheck,
+        ReportMaxSizeOfAbstractCheck.id: ReportMaxSizeOfAbstractCheck,
     }
 }
diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py
index 61a4f8a2..114d8703 100644
--- a/app/main/checks/report_checks/__init__.py
+++ b/app/main/checks/report_checks/__init__.py
@@ -21,4 +21,4 @@
 from .sections_check import LRReportSectionCheck
 from .style_check import ReportStyleCheck
 from .spelling_check import SpellingCheck
-
+from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck
diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py
new file mode 100644
index 00000000..47f6b07a
--- /dev/null
+++ b/app/main/checks/report_checks/max_abstract_size_check.py
@@ -0,0 +1,12 @@
+from app.main.checks.base_check import BaseReportCriterion
+
+
+class ReportMaxSizeOfAbstractCheck(BaseReportCriterion):
+    description = "Максимальный размер раздела Реферат в ВКР"
+    id = "max_abstract_size_check"
+
+    def __init__(self, file_info):
+        super().__init__(file_info)
+
+    def check(self):
+        return "123123"
\ No newline at end of file

From c834768d8f44452aa7579e1ef76da1fe97015da2 Mon Sep 17 00:00:00 2001
From: Anton Toropygin <antontoropygin4@gmail.com>
Date: Fri, 24 Nov 2023 18:04:19 +0300
Subject: [PATCH 09/28] fix-import

---
 app/main/checks/report_checks/max_abstract_size_check.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py
index 47f6b07a..6e62f27f 100644
--- a/app/main/checks/report_checks/max_abstract_size_check.py
+++ b/app/main/checks/report_checks/max_abstract_size_check.py
@@ -1,4 +1,4 @@
-from app.main.checks.base_check import BaseReportCriterion
+from ..base_check import BaseReportCriterion, answer
 
 
 class ReportMaxSizeOfAbstractCheck(BaseReportCriterion):
@@ -9,4 +9,4 @@ def __init__(self, file_info):
         super().__init__(file_info)
 
     def check(self):
-        return "123123"
\ No newline at end of file
+        return answer(True, "123123")
\ No newline at end of file

From 143b5becaf2192ad667f58cf7dd761f00e5edc7a Mon Sep 17 00:00:00 2001
From: Anton Toropygin <antontoropygin4@gmail.com>
Date: Mon, 27 Nov 2023 00:24:02 +0300
Subject: [PATCH 10/28] max-size-done

---
 .../report_checks/max_abstract_size_check.py  | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py
index 6e62f27f..d6195565 100644
--- a/app/main/checks/report_checks/max_abstract_size_check.py
+++ b/app/main/checks/report_checks/max_abstract_size_check.py
@@ -7,6 +7,32 @@ class ReportMaxSizeOfAbstractCheck(BaseReportCriterion):
 
     def __init__(self, file_info):
         super().__init__(file_info)
+        self.headers = []
+        self.max_size = 0
+
+    def late_init(self):
+        self.headers = self.file.make_headers(self.file_type['report_type'])
+        self.max_size = 1
 
     def check(self):
-        return answer(True, "123123")
\ No newline at end of file
+        self.late_init()
+        referat_page = 0
+        abstract_page = 0
+        main_page = 0
+        for header in self.headers:
+            if header["name"] == "Реферат":
+                referat_page = header["page"]
+            if header["name"] == "Abstract":
+                abstract_page = header["page"]
+            if header["name"] == "Содержание":
+                main_page = header["page"]
+        referat_size = abstract_page - referat_page
+        abstract_size = main_page - abstract_page
+        if referat_size > self.max_size:
+            return answer(False,
+                          f"<br><br>Размер раздела \"Реферат\" равен {referat_size} страницы, должен быть {self.max_size}")
+        if abstract_size > self.max_size:
+            return answer(False,
+                          f"<br><br>Размер раздела \"Abstract\" равен {abstract_size} страницы, должен быть {self.max_size}")
+        return answer(True,
+                      f"<br><br>Размеры разделов \"Реферат\" и \"Abstract\" соответствуют шаблону")

From f9a8dd62dd7ee2625461d8dcd57cfed46ac70c26 Mon Sep 17 00:00:00 2001
From: Anton Toropygin <antontoropygin4@gmail.com>
Date: Mon, 4 Dec 2023 22:35:05 +0300
Subject: [PATCH 11/28] add-check-for-both

---
 .../report_checks/max_abstract_size_check.py  | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py
index d6195565..fbeaf8bb 100644
--- a/app/main/checks/report_checks/max_abstract_size_check.py
+++ b/app/main/checks/report_checks/max_abstract_size_check.py
@@ -8,14 +8,13 @@ class ReportMaxSizeOfAbstractCheck(BaseReportCriterion):
     def __init__(self, file_info):
         super().__init__(file_info)
         self.headers = []
+        self.referat_size = 0
+        self.abstract_size = 0
         self.max_size = 0
 
     def late_init(self):
         self.headers = self.file.make_headers(self.file_type['report_type'])
         self.max_size = 1
-
-    def check(self):
-        self.late_init()
         referat_page = 0
         abstract_page = 0
         main_page = 0
@@ -26,13 +25,19 @@ def check(self):
                 abstract_page = header["page"]
             if header["name"] == "Содержание":
                 main_page = header["page"]
-        referat_size = abstract_page - referat_page
-        abstract_size = main_page - abstract_page
-        if referat_size > self.max_size:
+        self.referat_size = abstract_page - referat_page
+        self.abstract_size = main_page - abstract_page
+
+    def check(self):
+        self.late_init()
+        if self.referat_size > self.max_size and self.abstract_size > self.max_size:
+            return answer(False,
+                          f"<br><br>Размеры разделов \"Реферат\" и \"Abstract\" превышает максимальный размер")
+        if self.referat_size > self.max_size:
             return answer(False,
-                          f"<br><br>Размер раздела \"Реферат\" равен {referat_size} страницы, должен быть {self.max_size}")
-        if abstract_size > self.max_size:
+                          f"<br><br>Размер раздела \"Реферат\" равен {self.referat_size} страницы, должен быть {self.max_size}")
+        if self.abstract_size > self.max_size:
             return answer(False,
-                          f"<br><br>Размер раздела \"Abstract\" равен {abstract_size} страницы, должен быть {self.max_size}")
+                          f"<br><br>Размер раздела \"Abstract\" равен {self.abstract_size} страницы, должен быть {self.max_size}")
         return answer(True,
                       f"<br><br>Размеры разделов \"Реферат\" и \"Abstract\" соответствуют шаблону")

From 32d89bcbb51338f14e84d603dc275371bece0a05 Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_virgo@mail.ru>
Date: Thu, 18 Apr 2024 21:11:35 +0300
Subject: [PATCH 12/28] base for reload /results

---
 app/server.py              |  8 ++++++--
 app/templates/results.html |  3 +++
 assets/scripts/results.js  | 42 +++++++++++++++++++++++++++++++++-----
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/app/server.py b/app/server.py
index 20434a16..b4b611dc 100644
--- a/app/server.py
+++ b/app/server.py
@@ -265,10 +265,14 @@ def recheck(check_id):
 @login_required
 def get_status(task_id):
     task_result = AsyncResult(task_id)
+    task = ObjectId(task_id)
+    check = db_methods.get_check(task)
+    complete_task = check.is_ended
     result = {
         "task_id": task_id,
         "task_status": task_result.status,
-        "task_result": task_result.result
+        "task_result": task_result.result,
+        "complete_task": complete_task
     }
     return jsonify(result), 200
 
@@ -285,7 +289,7 @@ def results(_id):
         avg_process_time = None if check.is_ended else db_methods.get_average_processing_time()
         return render_template("./results.html", navi_upload=True, results=check,
                                columns=TABLE_COLUMNS, avg_process_time=avg_process_time,
-                               stats=format_check(check.pack()))
+                               stats=format_check(check.pack()), task_id = _id)
     else:
         logger.info("Запрошенная проверка не найдена: " + _id)
         return render_template("./404.html")
diff --git a/app/templates/results.html b/app/templates/results.html
index 548ea233..11bcf03a 100644
--- a/app/templates/results.html
+++ b/app/templates/results.html
@@ -6,6 +6,9 @@
 {% block title %}Результаты проверки{% endblock %}
 
 {% block main %}
+<script>
+    const task_id = "{{ task_id }}";
+</script>
 
     <div class="header row">{% include "header.html" %}</div>
     <div class="holder row" id="results_holder">
diff --git a/assets/scripts/results.js b/assets/scripts/results.js
index 1de5eab7..7dd16833 100644
--- a/assets/scripts/results.js
+++ b/assets/scripts/results.js
@@ -17,7 +17,7 @@ const renderPage = num => {
     pageIsRendering = true;
 
     pdfDoc.getPage(num).then(page => {
-        const viewport = page.getViewport({scale});
+        const viewport = page.getViewport({ scale });
         canvas.height = viewport.height;
         canvas.width = viewport.width;
 
@@ -89,14 +89,46 @@ if ($("#pdf_download").length !== 0) {
     pdfjsLib
         .getDocument(href)
         .promise.then(pdfDoc_ => {
-        pdfDoc = pdfDoc_;
+            pdfDoc = pdfDoc_;
 
-        $('#page-count')[0].textContent = pdfDoc.numPages;
-        renderPage(pageNum);
-    });
+            $('#page-count')[0].textContent = pdfDoc.numPages;
+            renderPage(pageNum);
+        });
 
     $('#prev-page').click(showPrevPage);
     $('#next-page').click(showNextPage);
 }
 
 $('#showAllVerdicts').click(toggleAllVerdicts);
+
+
+// function for automatic reload page after checking:
+var reloaded = true
+
+function checkStatus() {
+    const intervalId = setInterval(() => {
+        var request = new XMLHttpRequest();
+        request.open('GET', '/tasks/' + task_id, true);
+        request.onreadystatechange = function () {
+            if (request.readyState === XMLHttpRequest.DONE) {
+                if (request.status === 200) {
+                    var response = JSON.parse(request.responseText);
+                    if (response.complete_task && reloaded) {
+                        clearInterval(intervalId);
+                        return;
+                    } else {
+                        reloaded = false
+                        if (response.complete_task) {
+                            window.location.href = '/results/' + task_id;
+                        }
+                    }
+                } else {
+                    console.error('Request failed:', request.status);
+                }
+            }
+        };
+        request.send();
+    }, 5000);
+}
+
+checkStatus();

From 334baad749658e1557bb8d7797537393e1e3ae0d Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_virgo@mail.ru>
Date: Thu, 18 Apr 2024 21:15:43 +0300
Subject: [PATCH 13/28] message abt time is changed

---
 app/templates/results.html | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/app/templates/results.html b/app/templates/results.html
index 11bcf03a..d5df46cd 100644
--- a/app/templates/results.html
+++ b/app/templates/results.html
@@ -24,8 +24,7 @@ <h3 id="results_title" class="texteous ins">
             {% endif %}
         {% else %}
             <h4 id="results_title" class="texteous ins">
-                <i>Производится проверка файла. Примерное время: {{ avg_process_time }} секунд (перезагрузите
-                    страницу)</i>
+                <i>Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} </i>
             </h4>
         {% endif %}
         <!-- Stat table -->

From abf76bd3e928c605ab25ab28848c943b02c74ff5 Mon Sep 17 00:00:00 2001
From: Marina <verika_iz_virgo@mail.ru>
Date: Thu, 18 Apr 2024 22:10:57 +0300
Subject: [PATCH 14/28] process time is changed

---
 app/db/db_methods.py       | 12 +++++++-----
 app/templates/results.html |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/app/db/db_methods.py b/app/db/db_methods.py
index b372d607..590098c9 100644
--- a/app/db/db_methods.py
+++ b/app/db/db_methods.py
@@ -407,11 +407,13 @@ def mark_celery_task_as_finished(celery_task_id, finished_time=None):
         '$set': {'finished_at': finished_time,
                  'processing_time': (finished_time - celery_task['started_at']).total_seconds()}})
 
-
-def get_average_processing_time(min_time=5.0, limit=10):
-    # TODO: use only success check (failed checks processing time is more bigger than normal)
-    result = list(celery_check_collection.aggregate(
-        [{'$limit': limit}, {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}}]))
+def get_average_processing_time(min_time=5.0, limit=100000):
+    # use only success check (failed checks processing time is more bigger than normal)
+    result = list(celery_check_collection.aggregate([
+        {'$sample': {'size': limit}},
+        {'$match': {'processing_time': {'$lt': 200}}},
+        {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}}
+    ]))
     if result and result[0]['avg_processing_time']:
         result = result[0]['avg_processing_time']
         if result > min_time:
diff --git a/app/templates/results.html b/app/templates/results.html
index d5df46cd..d71879d2 100644
--- a/app/templates/results.html
+++ b/app/templates/results.html
@@ -24,7 +24,7 @@ <h3 id="results_title" class="texteous ins">
             {% endif %}
         {% else %}
             <h4 id="results_title" class="texteous ins">
-                <i>Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} </i>
+                <i>Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} сек.</i>
             </h4>
         {% endif %}
         <!-- Stat table -->

From e39164bb016f716669a9fade08d4581e92cfde3d Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Fri, 19 Apr 2024 20:02:36 +0300
Subject: [PATCH 15/28] uodate get_average_processing_time: rm limit, set max
 time to 170

---
 app/db/db_methods.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/app/db/db_methods.py b/app/db/db_methods.py
index 590098c9..ad903e95 100644
--- a/app/db/db_methods.py
+++ b/app/db/db_methods.py
@@ -407,11 +407,10 @@ def mark_celery_task_as_finished(celery_task_id, finished_time=None):
         '$set': {'finished_at': finished_time,
                  'processing_time': (finished_time - celery_task['started_at']).total_seconds()}})
 
-def get_average_processing_time(min_time=5.0, limit=100000):
+def get_average_processing_time(min_time=5.0):
     # use only success check (failed checks processing time is more bigger than normal)
     result = list(celery_check_collection.aggregate([
-        {'$sample': {'size': limit}},
-        {'$match': {'processing_time': {'$lt': 200}}},
+        {'$match': {'processing_time': {'$lt': 170}}},
         {'$group': {'_id': None, 'avg_processing_time': {'$avg': "$processing_time"}}}
     ]))
     if result and result[0]['avg_processing_time']:

From b4201162efca9c69ebc1b4535d775eed2b1a911f Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 18:18:42 +0300
Subject: [PATCH 16/28] revert get_status changes

---
 app/server.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/app/server.py b/app/server.py
index b4b611dc..0634be39 100644
--- a/app/server.py
+++ b/app/server.py
@@ -265,14 +265,10 @@ def recheck(check_id):
 @login_required
 def get_status(task_id):
     task_result = AsyncResult(task_id)
-    task = ObjectId(task_id)
-    check = db_methods.get_check(task)
-    complete_task = check.is_ended
     result = {
         "task_id": task_id,
         "task_status": task_result.status,
         "task_result": task_result.result,
-        "complete_task": complete_task
     }
     return jsonify(result), 200
 
@@ -289,7 +285,7 @@ def results(_id):
         avg_process_time = None if check.is_ended else db_methods.get_average_processing_time()
         return render_template("./results.html", navi_upload=True, results=check,
                                columns=TABLE_COLUMNS, avg_process_time=avg_process_time,
-                               stats=format_check(check.pack()), task_id = _id)
+                               stats=format_check(check.pack()))
     else:
         logger.info("Запрошенная проверка не найдена: " + _id)
         return render_template("./404.html")

From 855591f0f39146888d418bd5e27f989b5a831928 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 18:24:14 +0300
Subject: [PATCH 17/28] add new api route for check ready result

---
 app/server.py              |  12 ++
 app/templates/results.html |   4 -
 assets/scripts/results.js  | 258 +++++++++++++++++++------------------
 3 files changed, 144 insertions(+), 130 deletions(-)

diff --git a/app/server.py b/app/server.py
index 0634be39..8a88dd7d 100644
--- a/app/server.py
+++ b/app/server.py
@@ -290,6 +290,18 @@ def results(_id):
         logger.info("Запрошенная проверка не найдена: " + _id)
         return render_template("./404.html")
 
+    
+@app.route("/api/results/ready/<string:_id>", methods=["GET"])
+def ready_result(_id):
+    try:
+        oid = ObjectId(_id)
+    except bson.errors.InvalidId:
+        logger.error('_id exception:', exc_info=True)
+        return {}
+    check = db_methods.get_check(oid)
+    if check is not None:
+        return {"is_ended": check.is_ended}
+
 
 @app.route("/checks/<string:_id>", methods=["GET"])
 @login_required
diff --git a/app/templates/results.html b/app/templates/results.html
index d75c2d1d..8f05f6f4 100644
--- a/app/templates/results.html
+++ b/app/templates/results.html
@@ -6,10 +6,6 @@
 {% block title %}Результаты проверки{% endblock %}
 
 {% block main %}
-<script>
-    const task_id = "{{ task_id }}";
-</script>
-
     <div class="header row">{% include "header.html" %}</div>
     <div class="holder row" id="results_holder">
         {% if results.is_ended %}
diff --git a/assets/scripts/results.js b/assets/scripts/results.js
index 7dd16833..408581e8 100644
--- a/assets/scripts/results.js
+++ b/assets/scripts/results.js
@@ -2,133 +2,139 @@ import '../styles/results.css';
 import * as pdfjsLib from 'pdfjs-dist';
 import pdfjsWorker from "pdfjs-dist/build/pdf.worker.entry";
 
-let pdfDoc,
-    pageNum,
-    pageIsRendering,
-    pageNumIsPending,
-    scale,
-    canvas,
-    ctx,
-    currentPage;
-
-pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;
-
-const renderPage = num => {
-    pageIsRendering = true;
-
-    pdfDoc.getPage(num).then(page => {
-        const viewport = page.getViewport({ scale });
-        canvas.height = viewport.height;
-        canvas.width = viewport.width;
-
-        const renderCtx = {
-            canvasContext: ctx,
-            viewport
-        };
-
-        page.render(renderCtx).promise.then(() => {
-            pageIsRendering = false;
-
-            if (pageNumIsPending !== null) {
-                renderPage(pageNumIsPending);
-                pageNumIsPending = null;
-            }
-        });
-
-        $('#page-num')[0].textContent = num;
-    });
-};
-
-const queueRenderPage = num => {
-    if (pageIsRendering) {
-        pageNumIsPending = num;
-    } else {
-        renderPage(num);
-    }
-};
-
-const showPrevPage = () => {
-    if (pageNum <= 1) {
-        return;
-    }
-    pageNum--;
-    queueRenderPage(pageNum);
-};
-
-const showNextPage = () => {
-    if (pageNum >= pdfDoc.numPages) {
-        return;
-    }
-    pageNum++;
-    queueRenderPage(pageNum);
-};
-
-const toggleAllVerdicts = () => {
-    $('.accordian-body').collapse('toggle');
-};
-
-if ($("#pdf_download").length !== 0) {
-    var href = $("#pdf_download").attr('href');
-    pdfDoc = null;
-    pageNum = 1;
-    pageIsRendering = false,
-        pageNumIsPending = null;
-    scale = 1.1;
-    canvas = $("#the-canvas")[0];
-    ctx = canvas.getContext("2d");
-    var href = $("#pdf_download").attr('href');
-    pdfDoc = null;
-    pageNum = 1;
-    pageIsRendering = false,
-        pageNumIsPending = null;
-    scale = 1.1;
-
-    canvas = document.getElementById('the-canvas');
-    ctx = canvas.getContext('2d');
-
-    pdfjsLib
-        .getDocument(href)
-        .promise.then(pdfDoc_ => {
-            pdfDoc = pdfDoc_;
-
-            $('#page-count')[0].textContent = pdfDoc.numPages;
-            renderPage(pageNum);
-        });
-
-    $('#prev-page').click(showPrevPage);
-    $('#next-page').click(showNextPage);
-}
-
-$('#showAllVerdicts').click(toggleAllVerdicts);
-
-
-// function for automatic reload page after checking:
-var reloaded = true
-
-function checkStatus() {
-    const intervalId = setInterval(() => {
-        var request = new XMLHttpRequest();
-        request.open('GET', '/tasks/' + task_id, true);
-        request.onreadystatechange = function () {
-            if (request.readyState === XMLHttpRequest.DONE) {
-                if (request.status === 200) {
-                    var response = JSON.parse(request.responseText);
-                    if (response.complete_task && reloaded) {
-                        clearInterval(intervalId);
-                        return;
-                    } else {
-                        reloaded = false
-                        if (response.complete_task) {
-                            window.location.href = '/results/' + task_id;
+$(function(){
+        if($("#stats_table").length > 0){
+            let pdfDoc,
+                pageNum,
+                pageIsRendering,
+                pageNumIsPending,
+                scale,
+                canvas,
+                ctx,
+                currentPage;
+            
+            pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;
+            
+            const renderPage = num => {
+                pageIsRendering = true;
+            
+                pdfDoc.getPage(num).then(page => {
+                    const viewport = page.getViewport({ scale });
+                    canvas.height = viewport.height;
+                    canvas.width = viewport.width;
+            
+                    const renderCtx = {
+                        canvasContext: ctx,
+                        viewport
+                    };
+            
+                    page.render(renderCtx).promise.then(() => {
+                        pageIsRendering = false;
+            
+                        if (pageNumIsPending !== null) {
+                            renderPage(pageNumIsPending);
+                            pageNumIsPending = null;
                         }
-                    }
+                    });
+            
+                    $('#page-num')[0].textContent = num;
+                });
+            };
+            
+            const queueRenderPage = num => {
+                if (pageIsRendering) {
+                    pageNumIsPending = num;
                 } else {
-                    console.error('Request failed:', request.status);
+                    renderPage(num);
+                }
+            };
+            
+            const showPrevPage = () => {
+                if (pageNum <= 1) {
+                    return;
                 }
+                pageNum--;
+                queueRenderPage(pageNum);
+            };
+            
+            const showNextPage = () => {
+                if (pageNum >= pdfDoc.numPages) {
+                    return;
+                }
+                pageNum++;
+                queueRenderPage(pageNum);
+            };
+            
+            const toggleAllVerdicts = () => {
+                $('.accordian-body').collapse('toggle');
+            };
+            
+            if ($("#pdf_download").length !== 0) {
+                var href = $("#pdf_download").attr('href');
+                pdfDoc = null;
+                pageNum = 1;
+                pageIsRendering = false,
+                    pageNumIsPending = null;
+                scale = 1.1;
+                canvas = $("#the-canvas")[0];
+                ctx = canvas.getContext("2d");
+                var href = $("#pdf_download").attr('href');
+                pdfDoc = null;
+                pageNum = 1;
+                pageIsRendering = false,
+                    pageNumIsPending = null;
+                scale = 1.1;
+            
+                canvas = document.getElementById('the-canvas');
+                ctx = canvas.getContext('2d');
+            
+                pdfjsLib
+                    .getDocument(href)
+                    .promise.then(pdfDoc_ => {
+                        pdfDoc = pdfDoc_;
+            
+                        $('#page-count')[0].textContent = pdfDoc.numPages;
+                        renderPage(pageNum);
+                    });
+            
+                $('#prev-page').click(showPrevPage);
+                $('#next-page').click(showNextPage);
             }
-        };
-        request.send();
-    }, 5000);
-}
-
-checkStatus();
+            
+            $('#showAllVerdicts').click(toggleAllVerdicts);
+        
+        // function for automatic reload page after checking:
+        let reloaded = true
+        
+        function checkStatus() {
+            const intervalId = setInterval(() => {
+                let request = new XMLHttpRequest();
+                const check_id = window.location.pathname.substr(window.location.pathname.lastIndexOf('/') + 1);
+                request.open('GET', '/api/results/ready/' + check_id, true);
+                request.onreadystatechange = function () {
+                    if (request.readyState === XMLHttpRequest.DONE) {
+                        if (request.status === 200) {
+                            let response = JSON.parse(request.responseText);
+                            console.log(response.is_ended)
+                            if (response.is_ended && reloaded) {
+                                clearInterval(intervalId);
+                                return;
+                            } else {
+                                reloaded = false
+                                if (response.is_ended) {
+                                    window.location.href = '/results/' + check_id;
+                                }
+                            }
+                        } else {
+                            console.error('Request failed:', request.status);
+                            clearInterval(intervalId);
+                        }
+                    }
+                };
+                request.send();
+            }, 5000);
+        }
+        
+        checkStatus();
+    }
+});

From a2ad613af841eb6a35d7254d0a249f6425695e3e Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 18:44:14 +0300
Subject: [PATCH 18/28] preventing late request and early end of check

---
 assets/scripts/results.js | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/assets/scripts/results.js b/assets/scripts/results.js
index 408581e8..831f5dab 100644
--- a/assets/scripts/results.js
+++ b/assets/scripts/results.js
@@ -106,9 +106,8 @@ $(function(){
         // function for automatic reload page after checking:
         let reloaded = true
         
-        function checkStatus() {
-            const intervalId = setInterval(() => {
-                let request = new XMLHttpRequest();
+        function checkStatus(end_check_function){
+            let request = new XMLHttpRequest();
                 const check_id = window.location.pathname.substr(window.location.pathname.lastIndexOf('/') + 1);
                 request.open('GET', '/api/results/ready/' + check_id, true);
                 request.onreadystatechange = function () {
@@ -117,7 +116,7 @@ $(function(){
                             let response = JSON.parse(request.responseText);
                             console.log(response.is_ended)
                             if (response.is_ended && reloaded) {
-                                clearInterval(intervalId);
+                                end_check_function();
                                 return;
                             } else {
                                 reloaded = false
@@ -127,14 +126,20 @@ $(function(){
                             }
                         } else {
                             console.error('Request failed:', request.status);
-                            clearInterval(intervalId);
+                            end_check_function();
                         }
                     }
                 };
                 request.send();
+        }
+
+        function recheckStatus() {
+            const intervalId = setInterval(() => {
+                checkStatus(() => {clearInterval(intervalId)});
             }, 5000);
         }
         
-        checkStatus();
+        checkStatus(() => {});
+        recheckStatus()
     }
 });

From 7ec23d74e30b257330f50e6e79945f55ffdafc78 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 20:31:41 +0300
Subject: [PATCH 19/28] update feedback for FindThemeInReport

---
 app/main/checks/report_checks/find_theme_in_report.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py
index 5edbca1a..38a5873a 100644
--- a/app/main/checks/report_checks/find_theme_in_report.py
+++ b/app/main/checks/report_checks/find_theme_in_report.py
@@ -53,11 +53,14 @@ def check(self):
         intersection = lemma_theme.intersection(self.full_text)
         value_intersection = round(len(intersection)*100//len(lemma_theme))
         if value_intersection == 0:
-            return answer(False, f"Не пройдена! В отчете не упоминаются слова, завяленные в теме отчета.")
-        elif 1 < value_intersection < self.limit:
-            return answer(False, f"Не пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %).")
+            return answer(False, "Не пройдена! В отчете не упоминаются слова, заявленные в теме отчета.")
+        elif value_intersection < self.limit:
+            return answer(
+                          round(value_intersection/self.limit, 1),
+                          f"Частично пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %)."
+            )
         else:
-            return answer (True, f'Пройдена! Процент упоминания темы в ответе: {value_intersection} %.')
+            return answer (True, f'Пройдена! Процент упоминания темы в отчете: {value_intersection} %.')
 
     def find_theme(self):
         stop_words = set(stopwords.words("russian"))

From 5c2567bded216f4847475851adf3d9d87db32444 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 21:32:15 +0300
Subject: [PATCH 20/28] add labels to checks (+little fixes for max_size)

---
 app/main/checks/report_checks/find_theme_in_report.py    | 2 +-
 app/main/checks/report_checks/max_abstract_size_check.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py
index 38a5873a..56dd9a00 100644
--- a/app/main/checks/report_checks/find_theme_in_report.py
+++ b/app/main/checks/report_checks/find_theme_in_report.py
@@ -13,7 +13,7 @@
 
 
 class FindThemeInReport(BaseReportCriterion):
-
+    label = "Проверка упоминания темы в отчете"
     description = "Проверка упоминания темы в отчете"
     id = 'theme_in_report_check'
 
diff --git a/app/main/checks/report_checks/max_abstract_size_check.py b/app/main/checks/report_checks/max_abstract_size_check.py
index fbeaf8bb..7cba444e 100644
--- a/app/main/checks/report_checks/max_abstract_size_check.py
+++ b/app/main/checks/report_checks/max_abstract_size_check.py
@@ -2,19 +2,19 @@
 
 
 class ReportMaxSizeOfAbstractCheck(BaseReportCriterion):
-    description = "Максимальный размер раздела Реферат в ВКР"
+    label = "Максимальный размер раздела Реферат в ВКР"
+    description = "Максимальный размер раздела Реферат в ВКР (1 стр.)"
     id = "max_abstract_size_check"
 
-    def __init__(self, file_info):
+    def __init__(self, file_info, max_size=1):
         super().__init__(file_info)
         self.headers = []
         self.referat_size = 0
         self.abstract_size = 0
-        self.max_size = 0
+        self.max_size = max_size
 
     def late_init(self):
         self.headers = self.file.make_headers(self.file_type['report_type'])
-        self.max_size = 1
         referat_page = 0
         abstract_page = 0
         main_page = 0

From 6e0121a99fb48469d05f342109ed6fa82a6f89a5 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 21:49:19 +0300
Subject: [PATCH 21/28] split template_name check for pres and report

---
 .../checks/presentation_checks/__init__.py    |  2 +-
 .../presentation_checks/template_name.py      |  2 +-
 app/main/checks/report_checks/__init__.py     |  1 +
 .../checks/report_checks/template_name.py     | 25 +++++++++++++++++++
 4 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 app/main/checks/report_checks/template_name.py

diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py
index 90e517a5..d605c1d3 100644
--- a/app/main/checks/presentation_checks/__init__.py
+++ b/app/main/checks/presentation_checks/__init__.py
@@ -5,7 +5,7 @@
 from .sld_enum import SldEnumCheck
 from .sld_num import SldNumCheck
 from .sld_similarity import SldSimilarity
-from .template_name import TemplateNameCheck
+from .template_name import PresTemplateNameCheck
 from .title_format import TitleFormatCheck
 from .pres_right_words import PresRightWordsCheck
 from .image_share import PresImageShareCheck
diff --git a/app/main/checks/presentation_checks/template_name.py b/app/main/checks/presentation_checks/template_name.py
index eda96303..f73aa7bb 100644
--- a/app/main/checks/presentation_checks/template_name.py
+++ b/app/main/checks/presentation_checks/template_name.py
@@ -3,7 +3,7 @@
 from ..base_check import BasePresCriterion, answer
 
 
-class TemplateNameCheck(BasePresCriterion):
+class PresTemplateNameCheck(BasePresCriterion):
     label = "Проверка соответствия названия файла шаблону"
     description = 'Шаблон названия: "Презентация_ВКР_Иванов", "ПРЕЗЕНТАЦИЯ_НИР_ИВАНОВ"'
     id = 'template_name'
diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py
index cb3118de..50729335 100644
--- a/app/main/checks/report_checks/__init__.py
+++ b/app/main/checks/report_checks/__init__.py
@@ -23,3 +23,4 @@
 from .style_check import ReportStyleCheck
 from .spelling_check import SpellingCheck
 from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck
+from .template_name import ReportTemplateNameCheck
\ No newline at end of file
diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py
new file mode 100644
index 00000000..9b6e88d7
--- /dev/null
+++ b/app/main/checks/report_checks/template_name.py
@@ -0,0 +1,25 @@
+import re
+from datetime import datetime
+
+
+from ..base_check import BasePresCriterion, answer
+
+CUR_YEAR = datetime.now().year
+
+
+class ReportTemplateNameCheck(BasePresCriterion):
+    label = "Проверка соответствия названия файла шаблону"
+    description = f'Шаблон названия: "{CUR_YEAR}ВКР<номер_студ_билета>ФАМИЛИЯ", например "{CUR_YEAR}ВКР111111ИВАНОВ"'
+    id = 'template_name'
+
+    def __init__(self, file_info, regex=f"{CUR_YEAR}ВКР[0-9]{6}([А-ЯЁ]+)"):
+        super().__init__(file_info)
+        self.filename = self.filename.split('.', 1)[0]
+        self.reg = regex
+
+    def check(self):
+        if re.fullmatch(self.reg, self.filename):
+            return answer(True, "Пройдена!")
+        else:
+            return answer(False,
+                          f'Название файла презентации "<i>{self.filename}</i>" не соответствует шаблону: {self.reg}')

From 10727a8c6d4ab8b89053c29d566cc8281afcdd0e Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 22:04:36 +0300
Subject: [PATCH 22/28] update information for ReportMainCharacterCheck

---
 app/main/checks/report_checks/main_character_check.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/main/checks/report_checks/main_character_check.py b/app/main/checks/report_checks/main_character_check.py
index e29fc7ec..723fb536 100644
--- a/app/main/checks/report_checks/main_character_check.py
+++ b/app/main/checks/report_checks/main_character_check.py
@@ -3,12 +3,12 @@
 
 class ReportMainCharacterCheck(BaseReportCriterion):
     label = "Проверка фамилии и должности заведующего кафедрой"
-    description = 'И.о. зав. кафедрой: А.А. Лисс'
+    description = 'Зав. кафедрой: А.А. Лисс'
     id = 'main_character_check'
     priority = True
 
     def __init__(self, file_info, main_character_name_right="А.А. Лисс", main_character_name_wrong="К.В. Кринкин",
-                 main_character_job_right="И.о. зав. кафедрой", main_character_job_wrong="Зав. кафедрой"):
+                 main_character_job_right="Зав. кафедрой", main_character_job_wrong="И.о. зав. кафедрой"):
         super().__init__(file_info)
         self.headers = []
         self.main_character_name_right = main_character_name_right
@@ -31,10 +31,10 @@ def check(self):
                 if text_on_page.find(self.main_character_name_wrong) >= 0 and not text_on_page.find(
                         self.main_character_name_right) >= 0:
                     result_str += f"На странице {self.format_page_link([page])} указана неверная фамилия заведующего " \
-                                  f"кафедрой. Убедитесь, что И.о. зав. кафедрой {self.main_character_name_right}.<br>"
+                                  f"кафедрой. Убедитесь, что {self.main_character_job_right} {self.main_character_name_right}.<br>"
                 elif not text_on_page.find(self.main_character_name_right) >= 0:
                     result_str += f"На странице {self.format_page_link([page])} не указано ФИО заведующего кафедрой, в " \
-                                  f"графе И.о. зав. кафедрой должно быть указано {self.main_character_name_right}.<br>"
+                                  f"графе {self.main_character_job_right} должно быть указано {self.main_character_name_right}.<br>"
                 if text_on_page.find(self.main_character_job_wrong) >= 0 and not text_on_page.find(
                         self.main_character_job_right) >= 0:
                     result_str += f'На странице {self.format_page_link([page])} указана неверная должность ' \

From 54f777177cd8517faefe8270727285710ee8cf72 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Tue, 23 Apr 2024 22:41:36 +0300
Subject: [PATCH 23/28] little update for ReportPageCounter

---
 app/main/checks/report_checks/page_counter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/main/checks/report_checks/page_counter.py b/app/main/checks/report_checks/page_counter.py
index bd4952e4..fda1447d 100755
--- a/app/main/checks/report_checks/page_counter.py
+++ b/app/main/checks/report_checks/page_counter.py
@@ -3,11 +3,11 @@
 
 class ReportPageCounter(BaseReportCriterion):
     label = "Проверка количества страниц в файле"
-    description = 'Количество страниц должно быть больше 50ти, не считая "Приложения"'
+    description = 'Количество страниц должно быть в допустимых рамках, не считая "Приложения"'
     id = 'page_counter'
     priority = True
 
-    def __init__(self, file_info, min_number=50, max_number=None):
+    def __init__(self, file_info, min_number=50, max_number=150):
         super().__init__(file_info)
         self.number = [min_number, max_number]
 

From b185e02ad718a89658d4a1772d67489c6f328f02 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Wed, 24 Apr 2024 00:08:58 +0300
Subject: [PATCH 24/28] update feedback ReportTemplateNameCheck

---
 app/main/checks/report_checks/template_name.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py
index 9b6e88d7..4e3991eb 100644
--- a/app/main/checks/report_checks/template_name.py
+++ b/app/main/checks/report_checks/template_name.py
@@ -22,4 +22,4 @@ def check(self):
             return answer(True, "Пройдена!")
         else:
             return answer(False,
-                          f'Название файла презентации "<i>{self.filename}</i>" не соответствует шаблону: {self.reg}')
+                          f'Название файла презентации "<i>{self.filename}</i>" не соответствует шаблону (Пример: {CUR_YEAR}030301ИВАНОВ)')

From 0086c64cdd39b11c40e7ab0945a26f0635c8447e Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Wed, 24 Apr 2024 00:17:56 +0300
Subject: [PATCH 25/28] add final recheck route

---
 app/server.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/app/server.py b/app/server.py
index 8a88dd7d..9cb6ea69 100644
--- a/app/server.py
+++ b/app/server.py
@@ -252,13 +252,21 @@ def recheck(check_id):
 
     if not check:
         abort(404)
+    
+    # write files (original and pdf) to filestorage
     filepath = join(UPLOAD_FOLDER, f"{check_id}.{check.filename.rsplit('.', 1)[-1]}")
+    pdf_filepath = join(UPLOAD_FOLDER, f"{check_id}.pdf")
+    db_methods.write_file_from_db_file(oid, filepath)
+    db_methods.write_file_from_db_file(ObjectId(check.conv_pdf_fs_id), pdf_filepath)
+    
     check.is_ended = False
     db_methods.update_check(check)
-    db_methods.write_file_from_db_file(oid, filepath)
     task = create_task.delay(check.pack(to_str=True))  # add check to queue
     db_methods.add_celery_task(task.id, check_id)  # mapping celery_task to check (check_id = file_id)
-    return {'task_id': task.id, 'check_id': check_id}
+    if request.args.get('api'):
+        return {'task_id': task.id, 'check_id': check_id}
+    else:
+        return redirect(url_for('results', _id=check_id))
 
 
 @app.route("/tasks/<task_id>", methods=["GET"])

From 63979c0d3294c98334336e6b0f4a8666ee8c5ea9 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Wed, 24 Apr 2024 02:53:03 +0300
Subject: [PATCH 26/28] increase ready check timeout

---
 assets/scripts/results.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/scripts/results.js b/assets/scripts/results.js
index 831f5dab..3f43d45b 100644
--- a/assets/scripts/results.js
+++ b/assets/scripts/results.js
@@ -136,7 +136,7 @@ $(function(){
         function recheckStatus() {
             const intervalId = setInterval(() => {
                 checkStatus(() => {clearInterval(intervalId)});
-            }, 5000);
+            }, 10000);
         }
         
         checkStatus(() => {});

From 99d368dcaa4685a7bfb91ac9571562ee9028bc17 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Wed, 24 Apr 2024 02:53:48 +0300
Subject: [PATCH 27/28] update ReportImageShareCheck

---
 app/main/checks/report_checks/image_share_check.py | 4 ++--
 app/main/reports/docx_uploader/docx_uploader.py    | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/main/checks/report_checks/image_share_check.py b/app/main/checks/report_checks/image_share_check.py
index 0ef4fa1c..c96e3c1e 100644
--- a/app/main/checks/report_checks/image_share_check.py
+++ b/app/main/checks/report_checks/image_share_check.py
@@ -12,8 +12,8 @@ def __init__(self, file_info, limit=0.3):
     def check(self):
         if self.file.page_counter() < 4:
             return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
-        images_height = self.file.pdf_file.page_images(page_without_pril=self.file.count)
-        available_space = self.file.pdf_file.page_height(page_without_pril=self.file.count)
+        images_height = self.file.pdf_file.page_images(page_without_pril=self.file.page_count)
+        available_space = self.file.pdf_file.page_height(page_without_pril=self.file.page_count)
 
         images_value = images_height/available_space
 
diff --git a/app/main/reports/docx_uploader/docx_uploader.py b/app/main/reports/docx_uploader/docx_uploader.py
index 18d901b1..ac30dee4 100644
--- a/app/main/reports/docx_uploader/docx_uploader.py
+++ b/app/main/reports/docx_uploader/docx_uploader.py
@@ -22,6 +22,7 @@ def __init__(self):
         self.file = None
         self.special_paragraph_indices = {}
         self.headers_page = 0
+        self.page_count = 0
 
     def upload(self, file, pdf_filepath=''):
         self.file = docx.Document(file)

From 63e68482cdd2f6b1ea039ffc165cbb5ff9fe8fb5 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <darcenrall@gmail.com>
Date: Wed, 24 Apr 2024 15:42:27 +0300
Subject: [PATCH 28/28] update ReportTemplateNameCheck feedback

---
 app/main/checks/report_checks/template_name.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/main/checks/report_checks/template_name.py b/app/main/checks/report_checks/template_name.py
index 4e3991eb..24c245fd 100644
--- a/app/main/checks/report_checks/template_name.py
+++ b/app/main/checks/report_checks/template_name.py
@@ -22,4 +22,4 @@ def check(self):
             return answer(True, "Пройдена!")
         else:
             return answer(False,
-                          f'Название файла презентации "<i>{self.filename}</i>" не соответствует шаблону (Пример: {CUR_YEAR}030301ИВАНОВ)')
+                          f'Название файла презентации "<i>{self.filename}</i>" не соответствует шаблону (Пример: {CUR_YEAR}ВКР030301ИВАНОВ)')