diff --git a/app/main/checks/presentation_checks/sld_similarity.py b/app/main/checks/presentation_checks/sld_similarity.py
index 2d9d1db1..de8917ea 100644
--- a/app/main/checks/presentation_checks/sld_similarity.py
+++ b/app/main/checks/presentation_checks/sld_similarity.py
@@ -1,6 +1,6 @@
-from nlp.similarity_of_texts import check_similarity
from utils import get_text_from_slides, tasks_conclusions_feedback
-
+from app.nlp.similarity_of_texts import check_similarity
+from app.nlp.stemming import Stemming
from ..base_check import BasePresCriterion, answer
@@ -18,8 +18,6 @@ def __init__(self, file_info, goals='Цель и задачи', conclusion='За
def check(self):
goals = get_text_from_slides(self.file, self.goals)
conclusions = get_text_from_slides(self.file, self.conclusion)
- if goals == "" or conclusions == "":
- return answer(False, 'Задач или заключения не существует')
results = check_similarity(goals, conclusions)
diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py
index 56dd9a00..9602e44f 100644
--- a/app/main/checks/report_checks/find_theme_in_report.py
+++ b/app/main/checks/report_checks/find_theme_in_report.py
@@ -69,9 +69,13 @@ def find_theme(self):
if key == 1:
lower_text = text_on_page.lower()
text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
- list_full = text_without_punct.split()
- start = list_full.index('тема') + 1
- end = list_full.index('студент')
+ list_full = tuple(text_without_punct.split())
+ start, end = 0, len(list_full)
+ for index, value in enumerate(list_full):
+ if value == "тема":
+ start = index + 1
+ elif value in {"студент", "студентка"}:
+ end = index
list_theme = list_full[start:end]
lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
word not in stop_words}
diff --git a/app/main/checks/report_checks/image_references.py b/app/main/checks/report_checks/image_references.py
index 72a1a284..f9f68e6b 100644
--- a/app/main/checks/report_checks/image_references.py
+++ b/app/main/checks/report_checks/image_references.py
@@ -26,12 +26,18 @@ def check(self):
if not len(self.headers):
return answer(False, "Не найдено ни одного заголовка.
Проверьте корректность использования стилей.")
number_of_images, all_numbers = self.count_images_vkr()
- if not number_of_images:
+ count_file_image_object = self.file.pdf_file.get_image_num()
+ if count_file_image_object and not number_of_images:
+ return answer(False, f'В отчёте найдено {count_file_image_object} рисунков, но не найдено ни одной подписи рисунка.
Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
+ f'использован стиль {self.image_style}, и формат: '
+ f'"Рисунок <Номер рисунка> — <Название рисунка>".')
+ elif not number_of_images:
return answer(True, f'Не найдено ни одного рисунка.
Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
f'использован стиль {self.image_style}, и формат: '
- f'"Рисунок <Номер рисунка> -- <Название рисунка>".')
+ f'"Рисунок <Номер рисунка> — <Название рисунка>".')
else:
return answer(False, 'Во время обработки произошла критическая ошибка')
+
references = self.search_references()
if len(references.symmetric_difference(all_numbers)) == 0:
return answer(True, f"Пройдена!")
diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py
index b9471179..f6754557 100644
--- a/app/main/reports/pdf_document/pdf_document_manager.py
+++ b/app/main/reports/pdf_document/pdf_document_manager.py
@@ -27,6 +27,9 @@ def get_text_on_page(self):
# def get_text_on_page(self):
# return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)}
+ def get_image_num(self):
+ return len(self.pdf_file.get_page_images(0))
+
def page_images(self, page_without_pril):
total_height = 0
for page_num in range(page_without_pril):
diff --git a/app/nlp/find_tasks_on_slides.py b/app/nlp/find_tasks_on_slides.py
index 02571718..ef027d48 100644
--- a/app/nlp/find_tasks_on_slides.py
+++ b/app/nlp/find_tasks_on_slides.py
@@ -12,7 +12,7 @@ def compare_sentences(sentence_1, sentence_2):
stemming = Stemming()
set_1 = stemming.get_filtered_docs(sentence_1, False)
set_2 = stemming.get_filtered_docs(sentence_2, False)
- rvector = set_1.union(set_2)
+ rvector = set_1 #.union(set_2)
vector_1 = [w in set_1 for w in rvector]
vector_2 = [w in set_2 for w in rvector]
cosine_similarity = 1 - distance.cosine(vector_1, vector_2)
diff --git a/app/nlp/similarity_of_texts.py b/app/nlp/similarity_of_texts.py
index e537bc60..3dc9cf26 100644
--- a/app/nlp/similarity_of_texts.py
+++ b/app/nlp/similarity_of_texts.py
@@ -6,12 +6,18 @@
def check_similarity(string1, string2):
stemming = Stemming()
+
+ stemming.parse_text(string2, False)
further_dev = stemming.further_dev()
- base_conclusions = stemming.get_sentences(string2, False)
+ base_conclusions = stemming.sentences
ignore = re.compile('[0-9]+[.]?|Заключение|‹#›')
- clear_conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)]
- recognized_conclusions = [s for s in clear_conclusions if s != further_dev.get('dev_sentence')]
+ conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)]
+ cleaned_conclusions = "\n".join(s for s in conclusions if s != further_dev.get('dev_sentence'))
+
+ tasks = stemming.get_sentences(string1, True)
+ ignore = re.compile('[0-9][.]?|Задачи:|‹#›') # [:]?
+ cleaned_tasks = "\n".join(task for task in tasks if not re.fullmatch(ignore, task))
- percentage_of_similarity = int(compare_sentences(string1, string2) * 100)
+ percentage_of_similarity = int(compare_sentences(cleaned_tasks, cleaned_conclusions) * 100)
- return percentage_of_similarity, further_dev, recognized_conclusions
+ return percentage_of_similarity, further_dev, conclusions