diff --git a/app/main/checks/presentation_checks/sld_similarity.py b/app/main/checks/presentation_checks/sld_similarity.py index 2d9d1db1..de8917ea 100644 --- a/app/main/checks/presentation_checks/sld_similarity.py +++ b/app/main/checks/presentation_checks/sld_similarity.py @@ -1,6 +1,6 @@ -from nlp.similarity_of_texts import check_similarity from utils import get_text_from_slides, tasks_conclusions_feedback - +from app.nlp.similarity_of_texts import check_similarity +from app.nlp.stemming import Stemming from ..base_check import BasePresCriterion, answer @@ -18,8 +18,6 @@ def __init__(self, file_info, goals='Цель и задачи', conclusion='За def check(self): goals = get_text_from_slides(self.file, self.goals) conclusions = get_text_from_slides(self.file, self.conclusion) - if goals == "" or conclusions == "": - return answer(False, 'Задач или заключения не существует') results = check_similarity(goals, conclusions) diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 56dd9a00..9602e44f 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -69,9 +69,13 @@ def find_theme(self): if key == 1: lower_text = text_on_page.lower() text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) - list_full = text_without_punct.split() - start = list_full.index('тема') + 1 - end = list_full.index('студент') + list_full = tuple(text_without_punct.split()) + start, end = 0, len(list_full) + for index, value in enumerate(list_full): + if value == "тема": + start = index + 1 + elif value in {"студент", "студентка"}: + end = index list_theme = list_full[start:end] lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if word not in stop_words} diff --git a/app/main/checks/report_checks/image_references.py b/app/main/checks/report_checks/image_references.py index 72a1a284..f9f68e6b 100644 --- a/app/main/checks/report_checks/image_references.py +++ b/app/main/checks/report_checks/image_references.py @@ -26,12 +26,18 @@ def check(self): if not len(self.headers): return answer(False, "Не найдено ни одного заголовка.

Проверьте корректность использования стилей.") number_of_images, all_numbers = self.count_images_vkr() - if not number_of_images: + count_file_image_object = self.file.pdf_file.get_image_num() + if count_file_image_object and not number_of_images: + return answer(False, f'В отчёте найдено {count_file_image_object} рисунков, но не найдено ни одной подписи рисунка.

Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был ' + f'использован стиль {self.image_style}, и формат: ' + f'"Рисунок <Номер рисунка> — <Название рисунка>".') + elif not number_of_images: return answer(True, f'Не найдено ни одного рисунка.

Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был ' f'использован стиль {self.image_style}, и формат: ' - f'"Рисунок <Номер рисунка> -- <Название рисунка>".') + f'"Рисунок <Номер рисунка> — <Название рисунка>".') else: return answer(False, 'Во время обработки произошла критическая ошибка') + references = self.search_references() if len(references.symmetric_difference(all_numbers)) == 0: return answer(True, f"Пройдена!") diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py index b9471179..f6754557 100644 --- a/app/main/reports/pdf_document/pdf_document_manager.py +++ b/app/main/reports/pdf_document/pdf_document_manager.py @@ -27,6 +27,9 @@ def get_text_on_page(self): # def get_text_on_page(self): # return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)} + def get_image_num(self): + return len(self.pdf_file.get_page_images(0)) + def page_images(self, page_without_pril): total_height = 0 for page_num in range(page_without_pril): diff --git a/app/nlp/find_tasks_on_slides.py b/app/nlp/find_tasks_on_slides.py index 02571718..ef027d48 100644 --- a/app/nlp/find_tasks_on_slides.py +++ b/app/nlp/find_tasks_on_slides.py @@ -12,7 +12,7 @@ def compare_sentences(sentence_1, sentence_2): stemming = Stemming() set_1 = stemming.get_filtered_docs(sentence_1, False) set_2 = stemming.get_filtered_docs(sentence_2, False) - rvector = set_1.union(set_2) + rvector = set_1 #.union(set_2) vector_1 = [w in set_1 for w in rvector] vector_2 = [w in set_2 for w in rvector] cosine_similarity = 1 - distance.cosine(vector_1, vector_2) diff --git a/app/nlp/similarity_of_texts.py b/app/nlp/similarity_of_texts.py index e537bc60..3dc9cf26 100644 --- a/app/nlp/similarity_of_texts.py +++ b/app/nlp/similarity_of_texts.py @@ -6,12 +6,18 @@ def check_similarity(string1, string2): stemming = Stemming() + + stemming.parse_text(string2, False) further_dev = stemming.further_dev() - base_conclusions = stemming.get_sentences(string2, False) + base_conclusions = stemming.sentences ignore = re.compile('[0-9]+[.]?|Заключение|‹#›') - clear_conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)] - recognized_conclusions = [s for s in clear_conclusions if s != further_dev.get('dev_sentence')] + conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)] + cleaned_conclusions = "\n".join(s for s in conclusions if s != further_dev.get('dev_sentence')) + + tasks = stemming.get_sentences(string1, True) + ignore = re.compile('[0-9][.]?|Задачи:|‹#›') # [:]? + cleaned_tasks = "\n".join(task for task in tasks if not re.fullmatch(ignore, task)) - percentage_of_similarity = int(compare_sentences(string1, string2) * 100) + percentage_of_similarity = int(compare_sentences(cleaned_tasks, cleaned_conclusions) * 100) - return percentage_of_similarity, further_dev, recognized_conclusions + return percentage_of_similarity, further_dev, conclusions