Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
HadronCollider committed May 20, 2024
2 parents 67e35fc + c3101e0 commit f54ac44
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 15 deletions.
6 changes: 2 additions & 4 deletions app/main/checks/presentation_checks/sld_similarity.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from nlp.similarity_of_texts import check_similarity
from utils import get_text_from_slides, tasks_conclusions_feedback

from app.nlp.similarity_of_texts import check_similarity
from app.nlp.stemming import Stemming
from ..base_check import BasePresCriterion, answer


Expand All @@ -18,8 +18,6 @@ def __init__(self, file_info, goals='Цель и задачи', conclusion='За
def check(self):
goals = get_text_from_slides(self.file, self.goals)
conclusions = get_text_from_slides(self.file, self.conclusion)
if goals == "" or conclusions == "":
return answer(False, 'Задач или заключения не существует')

results = check_similarity(goals, conclusions)

Expand Down
10 changes: 7 additions & 3 deletions app/main/checks/report_checks/find_theme_in_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ def find_theme(self):
if key == 1:
lower_text = text_on_page.lower()
text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
list_full = text_without_punct.split()
start = list_full.index('тема') + 1
end = list_full.index('студент')
list_full = tuple(text_without_punct.split())
start, end = 0, len(list_full)
for index, value in enumerate(list_full):
if value == "тема":
start = index + 1
elif value in {"студент", "студентка"}:
end = index
list_theme = list_full[start:end]
lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
word not in stop_words}
Expand Down
10 changes: 8 additions & 2 deletions app/main/checks/report_checks/image_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,18 @@ def check(self):
if not len(self.headers):
return answer(False, "Не найдено ни одного заголовка.<br><br>Проверьте корректность использования стилей.")
number_of_images, all_numbers = self.count_images_vkr()
if not number_of_images:
count_file_image_object = self.file.pdf_file.get_image_num()
if count_file_image_object and not number_of_images:
return answer(False, f'В отчёте найдено {count_file_image_object} рисунков, но не найдено ни одной подписи рисунка.<br><br> Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
f'использован стиль {self.image_style}, и формат: '
f'"Рисунок <Номер рисунка> — <Название рисунка>".')
elif not number_of_images:
return answer(True, f'Не найдено ни одного рисунка.<br><br> Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
f'использован стиль {self.image_style}, и формат: '
f'"Рисунок <Номер рисунка> -- <Название рисунка>".')
f'"Рисунок <Номер рисунка> <Название рисунка>".')
else:
return answer(False, 'Во время обработки произошла критическая ошибка')

references = self.search_references()
if len(references.symmetric_difference(all_numbers)) == 0:
return answer(True, f"Пройдена!")
Expand Down
3 changes: 3 additions & 0 deletions app/main/reports/pdf_document/pdf_document_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def get_text_on_page(self):
# def get_text_on_page(self):
# return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)}

def get_image_num(self):
return len(self.pdf_file.get_page_images(0))

def page_images(self, page_without_pril):
total_height = 0
for page_num in range(page_without_pril):
Expand Down
2 changes: 1 addition & 1 deletion app/nlp/find_tasks_on_slides.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def compare_sentences(sentence_1, sentence_2):
stemming = Stemming()
set_1 = stemming.get_filtered_docs(sentence_1, False)
set_2 = stemming.get_filtered_docs(sentence_2, False)
rvector = set_1.union(set_2)
rvector = set_1 #.union(set_2)
vector_1 = [w in set_1 for w in rvector]
vector_2 = [w in set_2 for w in rvector]
cosine_similarity = 1 - distance.cosine(vector_1, vector_2)
Expand Down
16 changes: 11 additions & 5 deletions app/nlp/similarity_of_texts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@

def check_similarity(string1, string2):
stemming = Stemming()

stemming.parse_text(string2, False)
further_dev = stemming.further_dev()
base_conclusions = stemming.get_sentences(string2, False)
base_conclusions = stemming.sentences
ignore = re.compile('[0-9]+[.]?|Заключение|‹#›')
clear_conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)]
recognized_conclusions = [s for s in clear_conclusions if s != further_dev.get('dev_sentence')]
conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)]
cleaned_conclusions = "\n".join(s for s in conclusions if s != further_dev.get('dev_sentence'))

tasks = stemming.get_sentences(string1, True)
ignore = re.compile('[0-9][.]?|Задачи:|‹#›') # [:]?
cleaned_tasks = "\n".join(task for task in tasks if not re.fullmatch(ignore, task))

percentage_of_similarity = int(compare_sentences(string1, string2) * 100)
percentage_of_similarity = int(compare_sentences(cleaned_tasks, cleaned_conclusions) * 100)

return percentage_of_similarity, further_dev, recognized_conclusions
return percentage_of_similarity, further_dev, conclusions

0 comments on commit f54ac44

Please sign in to comment.