-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
491 additions
and
185 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
|
||
from ..base_check import BasePresCriterion, answer | ||
from .find_def_sld import FindDefSld | ||
from app.nlp.stemming import Stemming | ||
|
||
import string | ||
import nltk | ||
from nltk.tokenize import word_tokenize, sent_tokenize | ||
from nltk.corpus import stopwords | ||
from pymorphy2 import MorphAnalyzer | ||
|
||
|
||
MORPH_ANALYZER = MorphAnalyzer() | ||
|
||
|
||
class FindThemeInPres(BasePresCriterion): | ||
label = "Проверка упоминания темы в заголовках презентации" | ||
description = """Проверка упоминания темы в заголовках презентации, не включая титульный слайд, слайды "Цели и задачи", "Заключение" """ | ||
id = 'theme_in_pres_check' | ||
|
||
def __init__(self, file_info, skip_slides_nums=(1,), skip_slides_titles=("Заключение",), limit=60): | ||
super().__init__(file_info) | ||
self.skip_slides_title = skip_slides_titles | ||
slides = [] | ||
for title in self.skip_slides_title: | ||
find_sld = FindDefSld(file_info=file_info, key_slide=title) | ||
find_sld.check() | ||
slides.extend(find_sld.found_idxs) | ||
self.skip_slides = [ | ||
*skip_slides_nums, | ||
*slides | ||
] | ||
self.limit = limit | ||
|
||
def check(self): | ||
stop_words = set(stopwords.words("russian")) | ||
|
||
text_from_title = [slide for page, slide in enumerate(self.file.get_titles(), 1) if page not in self.skip_slides] | ||
theme = ''.join(word for word in text_from_title[0]) | ||
|
||
translator = str.maketrans('', '', string.punctuation) | ||
theme_without_punct = theme.translate(translator) | ||
words_in_theme = word_tokenize(theme_without_punct) | ||
lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_theme if word.lower() not in stop_words} | ||
|
||
text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1) if page > 1] | ||
string_from_text = ''.join(text_from_slide) | ||
|
||
text_without_punct = string_from_text.translate(translator) | ||
words_in_text = word_tokenize(text_without_punct) | ||
|
||
lemma_text = {MORPH_ANALYZER.parse(word)[0].normal_form for word in words_in_text if word.lower() not in stop_words} | ||
|
||
value_intersection = round(len(lemma_theme.intersection(lemma_text))*100//len(lemma_theme)) | ||
|
||
if value_intersection == 0: | ||
return answer(False, "Не пройдена! В презентации не упоминаются слова, завяленные в теме.") | ||
elif value_intersection < self.limit: | ||
return answer( | ||
round(value_intersection / self.limit, 1), | ||
f"Частично пройдена! Процент упоминания темы в вашей презентации ({value_intersection} %) ниже требуемого ({self.limit} %)." | ||
) | ||
else: | ||
return answer(True, f'Пройдена! Процент упоминания темы в презентации: {value_intersection} %') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import re | ||
import string | ||
|
||
from ..base_check import BaseReportCriterion, answer | ||
|
||
import string | ||
from nltk.tokenize import word_tokenize, sent_tokenize | ||
from nltk.corpus import stopwords | ||
from pymorphy2 import MorphAnalyzer | ||
|
||
|
||
MORPH_ANALYZER = MorphAnalyzer() | ||
|
||
|
||
class FindThemeInReport(BaseReportCriterion): | ||
label = "Проверка упоминания темы в отчете" | ||
description = "Проверка упоминания темы в отчете" | ||
id = 'theme_in_report_check' | ||
|
||
def __init__(self, file_info, limit = 40): | ||
super().__init__(file_info) | ||
self.intro = {} | ||
self.chapters = [] | ||
self.text_par = [] | ||
self.full_text = set() | ||
self.limit = limit | ||
|
||
def late_init(self): | ||
self.chapters = self.file.make_chapters(self.file_type['report_type']) | ||
|
||
def check(self): | ||
stop_words = set(stopwords.words("russian")) | ||
if self.file.page_counter() < 4: | ||
return answer(False, "В отчете недостаточно страниц. Нечего проверять.") | ||
|
||
self.late_init() | ||
for intro in self.chapters: | ||
header = intro["text"].lower() | ||
if header not in ['заключение', "введение", "список использованных источников", "условные обозначения"]: | ||
self.intro = intro | ||
for intro_par in self.intro['child']: | ||
par = intro_par['text'].lower() | ||
self.text_par.append(par) | ||
lemma_theme = self.find_theme() | ||
|
||
for text in self.text_par: | ||
translator = str.maketrans('', '', string.punctuation) | ||
theme_without_punct = text.translate(translator) | ||
word_in_text = word_tokenize(theme_without_punct) | ||
lemma_text = {MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words} | ||
self.full_text.update(lemma_text) | ||
|
||
intersection = lemma_theme.intersection(self.full_text) | ||
value_intersection = round(len(intersection)*100//len(lemma_theme)) | ||
if value_intersection == 0: | ||
return answer(False, "Не пройдена! В отчете не упоминаются слова, заявленные в теме отчета.") | ||
elif value_intersection < self.limit: | ||
return answer( | ||
round(value_intersection/self.limit, 1), | ||
f"Частично пройдена! Процент упоминания темы в вашем отчете ({value_intersection} %) ниже требуемого ({self.limit} %)." | ||
) | ||
else: | ||
return answer (True, f'Пройдена! Процент упоминания темы в отчете: {value_intersection} %.') | ||
|
||
def find_theme(self): | ||
stop_words = set(stopwords.words("russian")) | ||
lemma_theme = [] | ||
for key, text_on_page in self.file.pdf_file.get_text_on_page().items(): | ||
if key == 1: | ||
lower_text = text_on_page.lower() | ||
text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) | ||
list_full = text_without_punct.split() | ||
start = list_full.index('тема') + 1 | ||
end = list_full.index('студент') | ||
list_theme = list_full[start:end] | ||
lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if | ||
word not in stop_words} | ||
return lemma_theme |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.