diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0ff659da..0be14874 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Build system images (non-pulling) run: | @@ -16,10 +16,10 @@ jobs: - name: Build docker-compose run: | cp .env_example .env - docker-compose build + docker compose build - name: Run docker-compose run: | - docker-compose up -d + docker compose up -d sleep 10 - name: Run tests run: | diff --git a/.github/workflows/selenium_tests.yml b/.github/workflows/selenium_tests.yml index a31e1253..aa850300 100644 --- a/.github/workflows/selenium_tests.yml +++ b/.github/workflows/selenium_tests.yml @@ -7,17 +7,17 @@ jobs: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - name: Build docker-compose with docker-compose-selenium (tests) + - name: Build docker compose with docker-compose-selenium (tests) run: | cp .env_example .env cp app/VERSION_example.json app/VERSION.json - docker-compose -f docker-compose.yml -f docker-compose-selenium.yml build + docker compose -f docker-compose.yml -f docker-compose-selenium.yml build - name: Run docker-compose with docker-compose-selenium (tests) run: | - docker-compose -f docker-compose.yml -f docker-compose-selenium.yml up -d + docker compose -f docker-compose.yml -f docker-compose-selenium.yml up -d chmod +x tests/scripts/docker_check_tests.sh ./tests/scripts/docker_check_tests.sh diff --git a/README.md b/README.md index 8abfbaa9..8aa9524e 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ SIGNUP_PAGE_ENABLED=... CONSUMER_KEY=... CONSUMER_SECRET=... +ACCESS_TOKEN=... ``` ## Run diff --git a/app/db/db_methods.py b/app/db/db_methods.py index ad903e95..d80d92db 100644 --- a/app/db/db_methods.py +++ b/app/db/db_methods.py @@ -59,6 +59,8 @@ def get_user(username): else: return None +def get_all_users(): + return users_collection.find() # Returns True if user was found and updated and false if not (username can not be changed!) def edit_user(user): @@ -306,6 +308,21 @@ def get_logs_cursor(filter={}, limit=10, offset=0, sort=None, order=None): return rows, count +def get_user_cursor(filter={}, limit=10, offset=0, sort=None, order=None): + sort = 'username' if sort == 'username' else sort + + count = users_collection.count_documents(filter) + rows = users_collection.find(filter) + + if sort and order in ("asc, desc"): + rows = rows.sort(sort, pymongo.ASCENDING if order == + "asc" else pymongo.DESCENDING) + + rows = rows.skip(offset) if offset else rows + rows = rows.limit(limit) if limit else rows + + return rows, count + # Get stats for one user, return a list in the form # [check_id, login, time of check_id's creation, result(0/1)] diff --git a/app/main/check_packs/base_criterion_pack.py b/app/main/check_packs/base_criterion_pack.py index c478fd6b..9ab687c2 100644 --- a/app/main/check_packs/base_criterion_pack.py +++ b/app/main/check_packs/base_criterion_pack.py @@ -26,8 +26,9 @@ def check(self): try: criterion_check_result = criterion.check() except Exception as e: - logger.error(f'{criterion.id}: oшибка во время проверки: {e}') - criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG]} + err_msg = f'{criterion.id}: oшибка во время проверки: {e}' + logger.error(err_msg) + criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG, f"Информация об ошибке для администратора: {err_msg}"]} if criterion.priority and not criterion_check_result['score']: failed_priority_check = True criterion_check_result['verdict'] = [PRIORITY_CHECK_FAILED_MSG] + list(criterion_check_result['verdict']) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 598c3cc2..c053ce0a 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -18,6 +18,7 @@ ['pres_empty_slide'], ['theme_in_pres_check'], ['verify_git_link'], + ['pres_image_capture'], ] BASE_REPORT_CRITERION = [ ["simple_check"], @@ -43,6 +44,8 @@ ["spelling_check"], ["max_abstract_size_check"], ["theme_in_report_check"], + ['key_words_report_check'], + ["empty_task_page_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index d605c1d3..cb1f3c25 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -13,3 +13,4 @@ from .find_theme_in_pres import FindThemeInPres from .verify_git_link import PresVerifyGitLinkCheck from .empty_slide_check import PresEmptySlideCheck +from .name_of_image_check import PresImageCaptureCheck diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py index 5b363eae..a40518c9 100644 --- a/app/main/checks/presentation_checks/find_def_sld.py +++ b/app/main/checks/presentation_checks/find_def_sld.py @@ -12,18 +12,19 @@ def __init__(self, file_info, key_slide): self.found_idxs = [] def check(self): - for i, title in enumerate(self.file.get_titles(), 1): - if str(title).lower().find(str(self.type_of_slide).lower()) != -1: - #found_slides.append(self.file.get_text_from_slides()[i - 1]) - self.found_idxs.append(i) + if self.file is not None: + for i, title in enumerate(self.file.get_titles(), 1): + if str(title).lower().find(str(self.type_of_slide).lower()) != -1: + #found_slides.append(self.file.get_text_from_slides()[i - 1]) + self.found_idxs.append(i) # save fot future - self.file.found_index[str(self.type_of_slide)] = self.found_idxs.copy() + self.file.found_index[str(self.type_of_slide)] = self.found_idxs.copy() - if self.found_idxs: - return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, self.format_page_link(self.found_idxs))))) - else: - return answer(False, 'Слайд не найден') + if self.found_idxs: + return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, self.format_page_link(self.found_idxs))))) + else: + return answer(False, 'Слайд не найден') @property def name(self): diff --git a/app/main/checks/presentation_checks/name_of_image_check.py b/app/main/checks/presentation_checks/name_of_image_check.py new file mode 100644 index 00000000..faffdf17 --- /dev/null +++ b/app/main/checks/presentation_checks/name_of_image_check.py @@ -0,0 +1,40 @@ +from ..base_check import BasePresCriterion, answer +from utils import name_of_image_check_results + +class PresImageCaptureCheck(BasePresCriterion): + label = "Проверка наличия подписи к рисункам" + description = 'Подписи к рисункам должны содержать слово "Рисунок". Подпись к рисункам на слайдах без текста необязательна' + id = 'pres_image_capture' + + def __init__(self, file_info): + super().__init__(file_info) + + def check(self): + slides_without_capture = set() + slide_with_image_only = set() + result_str = 'Не пройдена! ' + all_captions = [] + for num, slide in enumerate(self.file.slides, 1): + captions = slide.get_captions() + if captions: + for caption in captions: + body_text = slide.get_text().replace(captions[0][0], '').replace(slide.get_title(), '').replace('', '').replace(' ', '') + if body_text.strip() or slide.get_table(): + all_captions.append(caption[0]) + if 'Рисунок' not in caption[0]: + slides_without_capture.add(num) + else: + if caption[0] != slide.get_title(): + slide_with_image_only.add(num) + if slides_without_capture: + result_str += ( + 'Подписи к рисункам на следующих слайдах отсутствуют или не содержат слова "Рисунок": {}'.format( + ', '.join(self.format_page_link(sorted(slides_without_capture)))) + '
') + if slide_with_image_only: + result_str += ( + 'Подписи к рисункам на следующих слайдах без текста необязательны: {}'.format( + ', '.join(self.format_page_link(sorted(slide_with_image_only)))) + '
') + if result_str: + return answer(False, name_of_image_check_results(result_str, all_captions)) + else: + return answer(True, 'Пройдена!') diff --git a/app/main/checks/presentation_checks/sld_similarity.py b/app/main/checks/presentation_checks/sld_similarity.py index 2d9d1db1..de8917ea 100644 --- a/app/main/checks/presentation_checks/sld_similarity.py +++ b/app/main/checks/presentation_checks/sld_similarity.py @@ -1,6 +1,6 @@ -from nlp.similarity_of_texts import check_similarity from utils import get_text_from_slides, tasks_conclusions_feedback - +from app.nlp.similarity_of_texts import check_similarity +from app.nlp.stemming import Stemming from ..base_check import BasePresCriterion, answer @@ -18,8 +18,6 @@ def __init__(self, file_info, goals='Цель и задачи', conclusion='За def check(self): goals = get_text_from_slides(self.file, self.goals) conclusions = get_text_from_slides(self.file, self.conclusion) - if goals == "" or conclusions == "": - return answer(False, 'Задач или заключения не существует') results = check_similarity(goals, conclusions) diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index 50729335..50972ce3 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -23,4 +23,6 @@ from .style_check import ReportStyleCheck from .spelling_check import SpellingCheck from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck -from .template_name import ReportTemplateNameCheck \ No newline at end of file +from .template_name import ReportTemplateNameCheck +from .key_words_check import KeyWordsReportCheck +from .empty_task_page_check import EmptyTaskPageCheck diff --git a/app/main/checks/report_checks/banned_words_check.py b/app/main/checks/report_checks/banned_words_check.py index e88dedab..351b403e 100644 --- a/app/main/checks/report_checks/banned_words_check.py +++ b/app/main/checks/report_checks/banned_words_check.py @@ -1,5 +1,5 @@ import re - +from .style_check_settings import StyleCheckSettings from ..base_check import BaseReportCriterion, answer, morph @@ -8,15 +8,32 @@ class ReportBannedWordsCheck(BaseReportCriterion): description = 'Запрещено упоминание слова "мы"' id = 'banned_words_check' - def __init__(self, file_info, words=["мы"], min_count=3, max_count=6): + def __init__(self, file_info, headers_map=None): super().__init__(file_info) - self.words = [morph.normal_forms(word)[0] for word in words] - self.min_count = min_count - self.max_count = max_count + self.words = [] + self.min_count = 0 + self.max_count = 0 + if headers_map: + self.config = headers_map + else: + self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS' + + def late_init(self): + self.headers_main = self.file.get_main_headers(self.file_type['report_type']) + if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config): + self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']] + self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check'] + self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check'] + else: + if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config): + self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']] + self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check'] + self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check'] def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") + self.late_init() detected_lines = {} result_str = f'Запрещенные слова: {"; ".join(self.words)}
' count = 0 diff --git a/app/main/checks/report_checks/chapters.py b/app/main/checks/report_checks/chapters.py index ad181135..e822d7fc 100644 --- a/app/main/checks/report_checks/chapters.py +++ b/app/main/checks/report_checks/chapters.py @@ -16,13 +16,13 @@ def __init__(self, file_info): self.target_styles = StyleCheckSettings.VKR_CONFIG self.target_styles = list(map(lambda elem: { "style": self.construct_style_from_description(elem["style"]) - }, self.target_styles)) + }, self.target_styles.values())) self.docx_styles = {} self.style_regex = {} self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS' self.presets = StyleCheckSettings.CONFIGS.get(self.config) level = 0 - for format_description in self.presets: + for _, format_description in self.presets.items(): self.docx_styles.update({level: format_description["docx_style"]}) pattern = re.compile(format_description["regex"]) self.style_regex.update({level: pattern}) diff --git a/app/main/checks/report_checks/empty_task_page_check.py b/app/main/checks/report_checks/empty_task_page_check.py new file mode 100644 index 00000000..229c7808 --- /dev/null +++ b/app/main/checks/report_checks/empty_task_page_check.py @@ -0,0 +1,45 @@ +import re +from ..base_check import BaseReportCriterion, answer + +PAGE_NAME = "ЗАДАНИЕ НА ВЫПУСКНУЮ КВАЛИФИКАЦИОННУЮ РАБОТУ" + + +class EmptyTaskPageCheck(BaseReportCriterion): + label = "Проверка на пустоту страницы с заданием" + description = f'Страница "{PAGE_NAME}" должна содержать текст' + id = 'empty_task_page_check' + + def __init__(self, file_info): + super().__init__(file_info) + self.check_words = {'студент', 'руководитель', 'тема работы'} + self.check_first_pattern = r'^студент+[а-яА-ЯёЁa-zA-Z]+группа\d+$' + self.check_date_pattern = r'^«\d+»[а-яА-ЯёЁa-zA-Z]+20\d+г«\d+»[а-яА-ЯёЁa-zA-Z]+20\d+г$' + self.result = {'Cтудент, Группа', 'Дата выдачи задания, Дата представления ВКР к защите', 'Студент', 'Руководитель', 'Тема работы'} + + def check(self): + if self.file.page_counter() < 4: + return answer(False, "В отчете недостаточно страниц. Нечего проверять.") + rows_text = self.file.pdf_file.page_rows_text(1) + if 'ЗАДАНИЕ' not in rows_text[0][4]: + return answer(False, f'Страница "{PAGE_NAME}" не найдена. Убедитесь, что она является второй в документе и не содержит ошибок в заголовке.') + elif len(rows_text) < 4: + return answer(False, f'Страница "{PAGE_NAME}" не содержит текста.') + else: + start_string = 0 + for row in rows_text: + row_string = row[4].replace('\n', '').replace('.', '').replace(' ', '').replace('_', '').lower() + if re.match(self.check_first_pattern, row_string): + self.result.discard('Cтудент, Группа') + start_string = row[5] + if re.match(self.check_date_pattern, row_string): + self.result.discard('Дата выдачи задания, Дата представления ВКР к защите') + for k in self.check_words: + for row in rows_text[start_string+1:]: + row_string = row[4].replace('\n', '').replace(' ', '').replace('_', '').lower() + if k.replace(' ', '') in row_string: + if len(row_string) > (len(k)+2): + self.result.discard(k.capitalize()) + if not self.result: + return answer(True, 'Пройдена!') + else: + return answer(False, f'Некоторые необходимые поля пусты или отсутствуют. Проверьте поля: «{"», «".join(self.result)}»') diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 56dd9a00..9602e44f 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -69,9 +69,13 @@ def find_theme(self): if key == 1: lower_text = text_on_page.lower() text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation)) - list_full = text_without_punct.split() - start = list_full.index('тема') + 1 - end = list_full.index('студент') + list_full = tuple(text_without_punct.split()) + start, end = 0, len(list_full) + for index, value in enumerate(list_full): + if value == "тема": + start = index + 1 + elif value in {"студент", "студентка"}: + end = index list_theme = list_full[start:end] lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if word not in stop_words} diff --git a/app/main/checks/report_checks/image_references.py b/app/main/checks/report_checks/image_references.py index 72a1a284..f9f68e6b 100644 --- a/app/main/checks/report_checks/image_references.py +++ b/app/main/checks/report_checks/image_references.py @@ -26,12 +26,18 @@ def check(self): if not len(self.headers): return answer(False, "Не найдено ни одного заголовка.

Проверьте корректность использования стилей.") number_of_images, all_numbers = self.count_images_vkr() - if not number_of_images: + count_file_image_object = self.file.pdf_file.get_image_num() + if count_file_image_object and not number_of_images: + return answer(False, f'В отчёте найдено {count_file_image_object} рисунков, но не найдено ни одной подписи рисунка.

Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был ' + f'использован стиль {self.image_style}, и формат: ' + f'"Рисунок <Номер рисунка> — <Название рисунка>".') + elif not number_of_images: return answer(True, f'Не найдено ни одного рисунка.

Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был ' f'использован стиль {self.image_style}, и формат: ' - f'"Рисунок <Номер рисунка> -- <Название рисунка>".') + f'"Рисунок <Номер рисунка> — <Название рисунка>".') else: return answer(False, 'Во время обработки произошла критическая ошибка') + references = self.search_references() if len(references.symmetric_difference(all_numbers)) == 0: return answer(True, f"Пройдена!") diff --git a/app/main/checks/report_checks/key_words_check.py b/app/main/checks/report_checks/key_words_check.py new file mode 100644 index 00000000..9a403613 --- /dev/null +++ b/app/main/checks/report_checks/key_words_check.py @@ -0,0 +1,66 @@ +import re +import string + +from nltk.tokenize import word_tokenize +from nltk.corpus import stopwords +from pymorphy2 import MorphAnalyzer +from ..base_check import BaseReportCriterion, answer + + +MORPH_ANALYZER = MorphAnalyzer() + +class KeyWordsReportCheck(BaseReportCriterion): + label = 'Проверка наличия раздела "Ключевые слова" и упоминание их в тексте' + description = 'Раздел идет сразу после названия работы и содержит не менее трех ключевых слов. Слова упоминаются в тексте' + id = 'key_words_report_check' + + def __init__(self, file_info, min_key_words = 3): + super().__init__(file_info) + self.min_key_words = min_key_words + self.chapters = [] + self.text_par = [] + self.lemme_list = [] + + def late_init(self): + self.chapters = self.file.make_chapters(self.file_type['report_type']) + + def check(self): + key_words_chapter = self.file.paragraphs[1].lower() + if 'ключевые слова' not in key_words_chapter: + return answer(False, 'Раздел "Ключевые слова" не найден') + cleaned_str = re.sub(r'<[^>]*>', '', key_words_chapter) + final_str = cleaned_str.replace('ключевые слова', '').replace(':','') + key_words_result = [word.strip() for word in final_str.split(',')] + if len(key_words_result) < self.min_key_words: + return answer(False, f'Не пройдена! Количество ключевых слов должно быть не менее {self.min_key_words}') + stop_words = set(stopwords.words("russian")) + if self.file.page_counter() < 4: + return answer(False, "В отчете недостаточно страниц. Нечего проверять.") + self.late_init() + for intro in self.chapters: + header = intro["text"].lower() + if header not in ['аннотация', "ключевые слова"]: + self.intro = intro + for intro_par in self.intro['child']: + par = intro_par['text'].lower() + self.text_par.append(par) + for phrase in key_words_result: + words = word_tokenize(phrase) + words_lemma = [MORPH_ANALYZER.parse(w)[0].normal_form for w in words if w.lower() not in stop_words] + phrase_lemma = ' '.join(words_lemma) + self.lemme_list.append(phrase) + for text in self.text_par: + cleaned_text = re.sub(r'<[^>]*>', '', text) + translator = str.maketrans('', '', string.punctuation) + text_without_punct = cleaned_text.translate(translator) + word_in_text = word_tokenize(text_without_punct) + lemma_text = [MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words] + lemma_text_str = ' '.join(lemma_text) + if phrase_lemma in lemma_text_str: + del self.lemme_list[-1] + break + + if self.lemme_list: + return answer(False, f"Не пройдена! В тексте не найдены следующие ключевые слова: «{'», «'.join(self.lemme_list)}»") + else: + return answer(True, f'Пройдена!') diff --git a/app/main/checks/report_checks/literature_references.py b/app/main/checks/report_checks/literature_references.py index e271ba4c..32be66ed 100644 --- a/app/main/checks/report_checks/literature_references.py +++ b/app/main/checks/report_checks/literature_references.py @@ -1,5 +1,5 @@ import re - +from .style_check_settings import StyleCheckSettings from ..base_check import BaseReportCriterion, answer @@ -8,17 +8,26 @@ class ReferencesToLiteratureCheck(BaseReportCriterion): description = '' id = 'literature_references' - def __init__(self, file_info, min_ref=1, max_ref=1000): + def __init__(self, file_info, min_ref=1, max_ref=1000, headers_map=None): super().__init__(file_info) self.headers = [] self.literature_header = [] self.name_pattern = r'список[ \t]*(использованных|использованной|)[ \t]*(источников|литературы)' - self.min_ref = min_ref - self.max_ref = max_ref + if headers_map: + self.config = headers_map + else: + self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS' def late_init_vkr(self): self.headers = self.file.make_chapters(self.file_type['report_type']) - self.literature_header = self.file.find_literature_vkr(self.file_type['report_type']) + self.headers_main = self.file.get_main_headers(self.file_type['report_type']) + if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config): + self.min_ref = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_ref_for_literature_references_check'] + self.max_ref = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['mах_ref_for_literature_references_check'] + else: + if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config): + self.min_ref = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_ref_for_literature_references_check'] + self.max_ref = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['mах_ref_for_literature_references_check'] def check(self): if self.file.page_counter() < 4: @@ -45,13 +54,16 @@ def check(self): if not number_of_sources: return answer(False, f'В Списке использованных источников не найдено ни одного источника.

Проверьте корректность использования нумированного списка.') - references = self.search_references(start_literature_par) + references, ref_sequence = self.search_references(start_literature_par) all_numbers = set() for i in range(1, number_of_sources + 1): all_numbers.add(i) if len(references.symmetric_difference(all_numbers)) == 0: if not self.min_ref <= number_of_sources <= self.max_ref: - return answer(False, f'Список источников оформлен верно, однако их количество ({number_of_sources}) не удовлетворяет необходимому критерию.
Количество источников должно быть от {self.min_ref} до {self.max_ref}.') + return answer(False, f'Список источников оформлен верно, однако их количество ({number_of_sources}) не удовлетворяет необходимому критерию.
Количество источников должно быть не менее {self.min_ref}.') + elif ref_sequence: + result_str += f"Источники должны нумероваться в порядке упоминания в тексте. Неправильные последовательности: {'; '.join(num for num in ref_sequence)}" + return answer(False, result_str) else: return answer(True, f"Пройдена!") elif len(references.difference(all_numbers)): @@ -71,11 +83,14 @@ def check(self):
  • Убедитесь, что для ссылки на источник используются квадратные скобки;
  • Убедитесь, что для оформления списка литературы был использован нумированный список;
  • Убедитесь, что после и перед нумированным списком отсутствуют непустые абзацы.
  • +
  • Убедитесь, что один источник не разбит на двае строки клавишей "Enter".
  • ''' return answer(False, result_str) def search_references(self, start_par): + prev_ref = 0 + ref_sequence = [] array_of_references = set() for i in range(0, start_par): if isinstance(self.file.paragraphs[i], str): @@ -88,10 +103,24 @@ def search_references(self, start_par): if re.match(r'\d+[ \-]+\d+', one_part): start, end = re.split(r'[ -]+', one_part) for k in range(int(start), int(end) + 1): - array_of_references.add(k) + prev_ref = self.add_references(k, prev_ref, array_of_references, ref_sequence) elif one_part != '': - array_of_references.add(int(one_part)) - return array_of_references + prev_ref = self.add_references(int(one_part), prev_ref, array_of_references, ref_sequence) + if ref_sequence: + if ref_sequence[0][1] == '0': + ref_sequence[0] = ref_sequence[0].replace('[0],', '') + return array_of_references, ref_sequence + + def add_references(self, k, prev_ref, array_of_references, ref_sequence): + if k not in array_of_references: + if k - prev_ref != 1: + ref_sequence.append(f'[{prev_ref}], [{k}]') + prev_ref = k + else: + if k - prev_ref == 1: + prev_ref = k + array_of_references.add(k) + return prev_ref def find_start_paragraph(self): start_index = 0 diff --git a/app/main/checks/report_checks/needed_headers_check.py b/app/main/checks/report_checks/needed_headers_check.py index 14223ebe..64c815ef 100644 --- a/app/main/checks/report_checks/needed_headers_check.py +++ b/app/main/checks/report_checks/needed_headers_check.py @@ -18,23 +18,25 @@ def __init__(self, file_info, main_heading_style="heading 2", headers_map=None): self.config = headers_map else: self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS' - self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"] + # self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"] def late_init(self): self.headers = self.file.make_chapters(self.file_type['report_type']) self.headers_page = self.file.find_header_page(self.file_type['report_type']) self.chapters_str = self.file.show_chapters(self.file_type['report_type']) - # TODO: change self.headers_main = self.file.get_main_headers(self.file_type['report_type']) - if self.headers_main == "Задание 1": - self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"] - elif self.headers_main == "Задание 2": - self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[1]["headers"] + if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config): + self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['headers'] + else: + if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config): + self.patterns = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['headers'] def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") self.late_init() + if not self.patterns: + return answer(False, "Не удалось сформировать требуемые заголовки исходя из названия работы. Проверьте наименование работы.") result_string = '' patterns = [] for pattern in self.patterns: @@ -55,7 +57,7 @@ def check(self): if not result_string: result_str = f'Все необходимые заголовки обнаружены!' result_str += f'

    Ниже представлена иерархия обработанных заголовков, ' \ - f'сравните с Содержанием {self.format_page_link([self.headers_page])}:' + f'сравните с Содержанием {self.format_page_link([self.headers_page])}:' result_str += self.chapters_str result_str += '
    Если список не точный, убедитесь, что для каждого заголовка указан верный стиль.' return answer(True, result_str) @@ -70,7 +72,7 @@ def check(self): ''' result_str += f'

    Ниже представлена иерархия обработанных заголовков, ' \ - f'сравните с Содержанием {self.format_page_link([self.headers_page])}:' + f'сравните с Содержанием {self.format_page_link([self.headers_page])}:' result_str += self.chapters_str result_str += '
    Если список не точный, убедитесь, что для каждого заголовка указан верный стиль.' return answer(False, result_str) diff --git a/app/main/checks/report_checks/section_component.py b/app/main/checks/report_checks/section_component.py index ac049e15..68781533 100644 --- a/app/main/checks/report_checks/section_component.py +++ b/app/main/checks/report_checks/section_component.py @@ -11,7 +11,8 @@ def __init__(self, file_info, chapter='Введение', patterns=('цель', super().__init__(file_info) self.intro = {} if headers_map: - self.chapter = StyleCheckSettings.CONFIGS.get(headers_map)[0]["headers"][0] + self.config = headers_map + self.chapter = '' patterns = ('цель', 'задач') else: self.chapter = chapter @@ -21,12 +22,18 @@ def __init__(self, file_info, chapter='Введение', patterns=('цель', self.patterns.append({"name": pattern.capitalize(), "text": pattern, "marker": 0}) def late_init(self): + if not self.chapter: + self.headers_main = self.file.get_main_headers(self.file_type['report_type']) + if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config): + self.chapter = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]["header_for_report_section_component"] self.chapters = self.file.make_chapters(self.file_type['report_type']) def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") self.late_init() + if not self.chapter: + return answer(True, f'Данная проверка не предусмотрена для работы с темой "{self.headers_main}"') result_str = '' for intro in self.chapters: header = intro["text"].lower() diff --git a/app/main/checks/report_checks/sections_check.py b/app/main/checks/report_checks/sections_check.py index 887eabb2..8fc03139 100644 --- a/app/main/checks/report_checks/sections_check.py +++ b/app/main/checks/report_checks/sections_check.py @@ -32,7 +32,7 @@ def check(self): self.file.parse_effective_styles() result = True result_str = "" - for preset in self.presets: + for _, preset in self.presets.items(): full_style = self.construct_style_from_description(preset["style"]) precheck_dict = {key: preset["style"].get(key) for key in self.prechecked_props} precheck_style = self.construct_style_from_description(precheck_dict) diff --git a/app/main/checks/report_checks/short_sections_check.py b/app/main/checks/report_checks/short_sections_check.py index df9d3360..53729cfb 100644 --- a/app/main/checks/report_checks/short_sections_check.py +++ b/app/main/checks/report_checks/short_sections_check.py @@ -25,19 +25,18 @@ def __init__(self, file_info, min_section_count=5, min_section_len=20, main_head if prechecked_props_lst is None: prechecked_props_lst = StyleCheckSettings.PRECHECKED_PROPS self.styles: List[Style] = [] - for format_description in self.presets: + for _, format_description in self.presets.items(): prechecked_dict = {key: format_description["style"].get(key) for key in prechecked_props_lst} style = Style() style.__dict__.update(prechecked_dict) self.styles.append(style) - def late_init(self): self.file.parse_effective_styles() try: self.cutoff_line = self.file.pdf_file.get_text_on_page()[2].split("\n")[0] except: self.cutoff_line = None - for preset in self.presets: + for _, preset in self.presets.items(): if preset["unify_regex"] is not None: self.file.unify_multiline_entities(preset["unify_regex"]) diff --git a/app/main/checks/report_checks/style_check.py b/app/main/checks/report_checks/style_check.py index 94e06453..e421a828 100644 --- a/app/main/checks/report_checks/style_check.py +++ b/app/main/checks/report_checks/style_check.py @@ -25,7 +25,7 @@ def __init__(self, file_info, header_styles=None, target_styles=None, key_proper self.target_styles)) if header_styles is None: self.header_styles = [] - for style_dict in StyleCheckSettings.LR_CONFIG: + for _, style_dict in StyleCheckSettings.LR_CONFIG.items(): header_style = {key: style_dict["style"].get(key) for key in StyleCheckSettings.PRECHECKED_PROPS} style = Style() style.__dict__.update(header_style) diff --git a/app/main/checks/report_checks/style_check_settings.py b/app/main/checks/report_checks/style_check_settings.py index 4392bfdc..88e1a7ad 100644 --- a/app/main/checks/report_checks/style_check_settings.py +++ b/app/main/checks/report_checks/style_check_settings.py @@ -10,6 +10,9 @@ class StyleCheckSettings: HEADER_REGEX = "^\\D+.+$" HEADER_1_REGEX = "^()([\\w\\s]+)$" HEADER_2_REGEX = "^()([\\w\\s]+)\\.$" + STD_BANNED_WORDS = ['мы'] + STD_MIN_LIT_REF = 1 + STD_MAX_LIT_REF = 1000 #just in case for future edit HEADER_1_STYLE = { "bold": True, "italic": False, @@ -82,41 +85,76 @@ class StyleCheckSettings: } # Order of styles may be significant! First level 1, then level 2 and so on. - LR_CONFIG = [ + LR_CONFIG = { + 'any_header': { "style": HEADER_1_STYLE, "docx_style": ["heading 1"], "headers": ["Исходный код программы"], "unify_regex": APPENDIX_UNIFY_REGEX, "regex": APPENDIX_REGEX, + "banned_words": STD_BANNED_WORDS, + 'min_count_for_banned_words_check': 3, + 'max_count_for_banned_words_check': 6, + 'min_ref_for_literature_references_check': STD_MIN_LIT_REF, + 'mах_ref_for_literature_references_check': STD_MAX_LIT_REF }, + 'second_header': { "style": HEADER_2_STYLE, "docx_style": ["heading 2"], "headers": ["Цель работы", "Выполнение работы", "Выводы"], "unify_regex": None, - "regex": HEADER_1_REGEX + "regex": HEADER_1_REGEX, } - ] + } - VKR_CONFIG = [ + VKR_CONFIG = { + 'any_header': { "style": HEADER_1_STYLE, "docx_style": ["heading 2"], "headers": ["ВВЕДЕНИЕ", "ЗАКЛЮЧЕНИЕ", "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ"], "unify_regex": None, - "regex": HEADER_REGEX + "regex": HEADER_REGEX, + "banned_words": STD_BANNED_WORDS, + 'min_count_for_banned_words_check': 3, + 'max_count_for_banned_words_check': 6, + 'min_ref_for_literature_references_check': STD_MIN_LIT_REF, + 'mах_ref_for_literature_references_check': STD_MAX_LIT_REF }, + 'second_header': { "style": HEADER_1_NUM_STYLE, "docx_style": ["heading 2", "heading 3", "heading 4"], "headers": [], "unify_regex": None, + "regex": HEADER_NUM_REGEX, + } + } + + NIR_CONFIG = { + 'any_header': + { + "style": HEADER_1_STYLE, + "docx_style": ["heading 2"], + "headers": ["ПОСТАНОВКА ЗАДАЧИ", "РЕЗУЛЬТАТЫ РАБОТЫ В ВЕСЕННЕМ СЕМЕСТРЕ", "ОПИСАНИЕ ПРЕДПОЛАГАЕМОГО МЕТОДА РЕШЕНИЯ", + "ПЛАН РАБОТЫ НА ОСЕННИЙ СЕМЕСТР", "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ"], + "unify_regex": None, + "regex": HEADER_REGEX, + }, + 'second_header': + { + "style": HEADER_1_NUM_STYLE, + "docx_style": ["heading 3", "heading 4"], + "headers": ["ПЛАН", "РЕЗУЛЬТАТЫ"], + "unify_regex": None, "regex": HEADER_NUM_REGEX } - ] + } - MD_CONFIG = [ + MD_CONFIG = { + 'Задание 1': { "style": HEADER_1_STYLE, "docx_style": ["heading 2"], @@ -125,9 +163,14 @@ class StyleCheckSettings: "Методы обоснования", "Статья", ], + "header_for_report_section_component": "Поставленная цель и задачи", "unify_regex": None, - "regex": HEADER_REGEX + "regex": HEADER_REGEX, + "banned_words": STD_BANNED_WORDS, + 'min_count_for_banned_words_check': 3, + 'max_count_for_banned_words_check': 6, }, + 'Задание 2': { "style": HEADER_1_STYLE, "docx_style": ["heading 2"], @@ -135,24 +178,59 @@ class StyleCheckSettings: "Характеристика выводов", "Статья", ], + "header_for_report_section_component": "", "unify_regex": None, - "regex": HEADER_REGEX - }, + "regex": HEADER_REGEX, + "banned_words": STD_BANNED_WORDS, + 'min_count_for_banned_words_check': 3, + 'max_count_for_banned_words_check': 6, + } + } + + OPNP_CONFIG = { + 'Сравнение аналогов': { - "style": HEADER_1_NUM_STYLE, - "docx_style": ["heading 2", "heading 3", "heading 4"], - "headers": [], + "style": HEADER_1_STYLE, + "docx_style": ["heading 2"], + "headers": ["Принцип отбора аналогов", + "Критерии сравнения аналогов", + "Выводы по итогам сравнения", + "Выбор метода решения", + ], "unify_regex": None, - "regex": HEADER_NUM_REGEX + "regex": HEADER_REGEX, + "banned_words": ['аттач', 'билдить', 'бинарник', 'валидный', 'дебаг', 'деплоить', 'десктопное', 'железо', + 'исходники', 'картинка', 'консольное', 'конфиг', 'кусок', 'либа', 'лог', 'мануал', 'машина', + 'отнаследованный', 'парсинг', 'пост', 'распаковать', 'сбоит', 'скачать', 'склонировать', 'скрипт', + 'тестить', 'тул', 'тула', 'тулза', 'фиксить', 'флажок', 'флаг', 'юзкейс', 'продакт', 'продакшн', + 'прод', 'фидбек', 'дедлайн', 'дэдлайн'], + 'min_ref_for_literature_references_check': 5, + 'mах_ref_for_literature_references_check': 1000, #just for future possible edit + 'min_count_for_banned_words_check': 0, + 'max_count_for_banned_words_check': 0 }, + 'any_header': { - "style": "Main_header", - "docx_style": ["heading 1"], - "headers": ["Задание"], + "style": HEADER_1_STYLE, + "docx_style": ["heading 2"], + "headers": ["Аннотация", + "Введение", + "Обзор предметной области", + "Выбор метода решения", + "Заключение", + "Список литературы" + ], "unify_regex": None, - "regex": HEADER_NUM_REGEX + "regex": HEADER_REGEX, + "banned_words": ['оптимально', 'оптимальный', 'надежный', 'интуитивный'], + 'min_ref_for_literature_references_check': 5, + 'mах_ref_for_literature_references_check': 1000, #just for future possible edit + 'min_count_for_banned_words_check': 0, + 'max_count_for_banned_words_check': 0 } - ] + + } + LR_MAIN_TEXT_CONFIG = [ { @@ -197,5 +275,7 @@ class StyleCheckSettings: 'LR_MAIN_TEXT': LR_MAIN_TEXT_CONFIG, 'VKR_HEADERS': VKR_CONFIG, 'VKR_MAIN_TEXT': VKR_MAIN_TEXT_CONFIG, - 'MD_HEADERS' : MD_CONFIG + 'NIR_HEADERS': NIR_CONFIG, + 'MD_HEADERS' : MD_CONFIG, + 'OPNP_HEADERS' : OPNP_CONFIG, } diff --git a/app/main/presentations/pptx/slide_pptx.py b/app/main/presentations/pptx/slide_pptx.py index 46681cdd..e71dff7f 100644 --- a/app/main/presentations/pptx/slide_pptx.py +++ b/app/main/presentations/pptx/slide_pptx.py @@ -9,6 +9,8 @@ class SlidePPTX(SlideBasic): def __init__(self, container, w, h, index=-1): SlideBasic.__init__(self, container) self.dimensions = [w, h] + self.size_of_shape = [] + self.captions = [] for p in container.placeholders: if p.is_placeholder and p.placeholder_format.type == PP_PLACEHOLDER.SLIDE_NUMBER: if p.text == '‹#›': @@ -26,8 +28,22 @@ def __init__(self, container, w, h, index=-1): self.images.append(shape) if hasattr(shape, "text"): self.text += "\n" + shape.text + if shape.text.replace(' ', '').replace('', ''): # we replace number of page because it is read as text too + self.size_of_shape.append((shape.text, shape.top, shape.left, shape.width)) if shape.has_table: self.table.append(shape) + + if self.images: + for image in self.images: + ''' + The next expression finds the most close text for image. + It is work this way, because a holder for picture and a holder for capture don't strictly correspond in size. + For example, sometimes the capture holder runs over the picture holder, the text width is always different ect + ''' + sorted_size_of_shape = sorted(self.size_of_shape, + key=lambda x:(abs(x[1]-(image.top+image.height))+abs(x[2]-image.left) + abs(x[3]+x[2]-(image.left+image.width)))) + self.captions.append(sorted_size_of_shape[0]) + def __str__(self): return super().__str__() diff --git a/app/main/presentations/slide_basic.py b/app/main/presentations/slide_basic.py index 17114320..20d830b5 100644 --- a/app/main/presentations/slide_basic.py +++ b/app/main/presentations/slide_basic.py @@ -6,6 +6,7 @@ def __init__(self, container): # Extracting only the properties we need! self.dimensions = [-1, -1] self.images = [] self.table = [] + self.captions = [] def get_title(self): return self.title @@ -21,6 +22,9 @@ def get_images(self): def get_table(self): return self.table + + def get_captions(self): + return self.captions def __str__(self): return f"\tTitle: {self.title}.\n\tText: {self.text}.\n\tPage_num: {self.page_number}" diff --git a/app/main/reports/md_uploader/md_uploader.py b/app/main/reports/md_uploader/md_uploader.py index ce55fb44..ec337830 100644 --- a/app/main/reports/md_uploader/md_uploader.py +++ b/app/main/reports/md_uploader/md_uploader.py @@ -73,7 +73,7 @@ def parse(self, md_text): def make_paragraphs(self, html_text): html_text = html_text.replace("
  • ", "").replace("
  • ", "").replace("", "").replace("
      ", "") - self.paragraphs = html_text.split('\n') + self.paragraphs = [paragraph for paragraph in html_text.split('\n') if paragraph.strip()] return self.paragraphs def page_counter(self): # we need this just to find a last page and make link to the literature in banned_words_in_literature @@ -121,7 +121,6 @@ def parse_effective_styles(self): else: paragraph["runs"].append({"text": par, "style": 'body text'}) self.styled_paragraphs.append(paragraph) - return self.styled_paragraphs def make_chapters(self, work_type): diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py index 9c72dc3a..4ad951b5 100644 --- a/app/main/reports/pdf_document/pdf_document_manager.py +++ b/app/main/reports/pdf_document/pdf_document_manager.py @@ -27,15 +27,19 @@ def get_text_on_page(self): # def get_text_on_page(self): # return {page + 1: self.pages[page].extract_text() for page in range(self.page_count_all)} + def get_image_num(self): + return len(self.pdf_file.get_page_images(0)) + def page_images(self, page_without_pril): total_height = 0 for page_num in range(page_without_pril): page = self.pdf_file[page_num] images = self.pdf_file.get_page_images(page) for image in images: - image_coord = page.get_image_bbox(image[7], transform=0) - total_height += (image_coord[3] - image_coord[1]) - + image_coord = page.get_image_bbox(image[7], transform=0) # might be [1.0, 1.0, -1.0, -1.0] + image_height = image_coord[3] - image_coord[1] + if image_height > 0: + total_height += image_height return total_height def page_height(self, page_without_pril): @@ -46,7 +50,12 @@ def page_height(self, page_without_pril): available_space = (height - top_margin - bottom_margin)*page_without_pril return available_space - + + def page_rows_text(self, page_num): + page = self.pdf_file.load_page(page_num) + text_blocks = page.get_text("blocks") + return text_blocks + # def get_only_text_on_page(self): # if not self.only_text_on_page: # only_text_on_page = {} diff --git a/app/nlp/find_tasks_on_slides.py b/app/nlp/find_tasks_on_slides.py index 02571718..ef027d48 100644 --- a/app/nlp/find_tasks_on_slides.py +++ b/app/nlp/find_tasks_on_slides.py @@ -12,7 +12,7 @@ def compare_sentences(sentence_1, sentence_2): stemming = Stemming() set_1 = stemming.get_filtered_docs(sentence_1, False) set_2 = stemming.get_filtered_docs(sentence_2, False) - rvector = set_1.union(set_2) + rvector = set_1 #.union(set_2) vector_1 = [w in set_1 for w in rvector] vector_2 = [w in set_2 for w in rvector] cosine_similarity = 1 - distance.cosine(vector_1, vector_2) diff --git a/app/nlp/similarity_of_texts.py b/app/nlp/similarity_of_texts.py index e537bc60..3dc9cf26 100644 --- a/app/nlp/similarity_of_texts.py +++ b/app/nlp/similarity_of_texts.py @@ -6,12 +6,18 @@ def check_similarity(string1, string2): stemming = Stemming() + + stemming.parse_text(string2, False) further_dev = stemming.further_dev() - base_conclusions = stemming.get_sentences(string2, False) + base_conclusions = stemming.sentences ignore = re.compile('[0-9]+[.]?|Заключение|‹#›') - clear_conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)] - recognized_conclusions = [s for s in clear_conclusions if s != further_dev.get('dev_sentence')] + conclusions = [ch for ch in base_conclusions if not re.fullmatch(ignore, ch)] + cleaned_conclusions = "\n".join(s for s in conclusions if s != further_dev.get('dev_sentence')) + + tasks = stemming.get_sentences(string1, True) + ignore = re.compile('[0-9][.]?|Задачи:|‹#›') # [:]? + cleaned_tasks = "\n".join(task for task in tasks if not re.fullmatch(ignore, task)) - percentage_of_similarity = int(compare_sentences(string1, string2) * 100) + percentage_of_similarity = int(compare_sentences(cleaned_tasks, cleaned_conclusions) * 100) - return percentage_of_similarity, further_dev, recognized_conclusions + return percentage_of_similarity, further_dev, conclusions diff --git a/app/routes/admin.py b/app/routes/admin.py index 4edef375..e3e3214b 100644 --- a/app/routes/admin.py +++ b/app/routes/admin.py @@ -18,7 +18,7 @@ def my_wrapper(*args, **kwargs): @admin.route('/', methods=["GET"]) @admin_required def index(): - return "There will be a list of all admin pages here" + return render_template('admin_pages_list.html') @admin.route('/criterions', methods=["GET"]) diff --git a/app/routes/users.py b/app/routes/users.py new file mode 100644 index 00000000..d024063a --- /dev/null +++ b/app/routes/users.py @@ -0,0 +1,91 @@ +import json +from flask import abort, Blueprint, render_template, request, jsonify +from flask_login import current_user +from functools import wraps +from app.db.db_methods import get_all_users, get_user +from utils import checklist_filter, format_check_for_table +from db import db_methods + +users = Blueprint('users', __name__, template_folder='templates', static_folder='static') + + +def admin_required(route_func): + @wraps(route_func) + def my_wrapper(*args, **kwargs): + if current_user and current_user.is_admin: + return route_func(*args, **kwargs) + abort(403) + return my_wrapper + +@users.route("/data") +@admin_required +def users_data(): + filters = request.args.get("filter", "{}") + try: + filters = json.loads(filters) + filters = filters if filters else {} + except Exception as e: + # logger.warning("Can't parse filters") + # logger.warning(repr(e)) + filters = {} + filter_query = {} + if f_username := filters.get("username", None): + filter_query["username"] = {"$regex": f_username} + + if f_name := filters.get("name", None): + filter_query["name"] = {"$regex": f_name} + + if f_formats := filters.get("all_formats", None): + filter_query["formats"] = {"$regex": f_formats} + + if f_criteria := filters.get("all_criteria", None): + filter_query["criteria"] = {"$regex": f_criteria} + + if f_check_counts := filters.get("check_counts", None): + try: + f_check_counts_value, f_check_counts_cond = int(f_check_counts.split()[1]), f_check_counts.split()[0] + if f_check_counts_cond == '>': + filter_query["$expr"] = {"$gte": [{"$size": "$presentations"}, f_check_counts_value]} + elif f_check_counts_cond == "<": + filter_query["$expr"] = {"$lte": [{"$size": "$presentations"}, f_check_counts_value]} + except ValueError: + pass + + limit = request.args.get("limit", "") + limit = int(limit) if limit.isnumeric() else 10 + + offset = request.args.get("offset", "") + offset = int(offset) if offset.isnumeric() else 0 + + sort = request.args.get("sort", "") + sort = 'username' if not sort else sort + + order = request.args.get("order", "") + order = 'username' if not order else order + + rows, count = db_methods.get_user_cursor(filter=filter_query, limit=limit, offset=offset, sort=sort, order=order) + + response = { + "total": count, + "rows": [{ + "username": item["username"], + "name": item["name"], + "all_formats": item["formats"], + "all_criteria": item["criteria"], + "check_counts": len(item["presentations"]), + + } for item in rows] + } + return jsonify(response) + + +@users.route('/', methods=["GET"]) +@admin_required +def index(): + return render_template('user_list.html') + +@users.route('/', methods=["GET"]) +@admin_required +def user_info(username): + user_info = get_user(username) + return render_template('one_user_info.html', user_info=user_info) diff --git a/app/server.py b/app/server.py index 9cb6ea69..366978d9 100644 --- a/app/server.py +++ b/app/server.py @@ -31,6 +31,7 @@ from utils import checklist_filter, decorator_assertion, get_file_len, format_check from app.main.checks import CRITERIA_INFO from routes.admin import admin +from routes.users import users logger = get_root_logger('web') UPLOAD_FOLDER = '/usr/src/project/files' @@ -52,6 +53,7 @@ app.config['CELERY_BROKER_URL'] = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") app.register_blueprint(admin, url_prefix='/admin') +app.register_blueprint(users, url_prefix='/users') app.logger.addHandler(get_logging_stdout_handler()) @@ -487,8 +489,8 @@ def check_list_data(): def get_query(req): - # query for download csv/zip - filter_query = checklist_filter(req.args) + # query for download csv/zip (only for admins) + filter_query = checklist_filter(req.args, is_admin=True) limit = False offset = False sort = req.args.get("sort", "") @@ -505,11 +507,20 @@ def get_stats(): return [format_check_for_table(item, set_link=URL_DOMEN) for item in rows] +def check_access_token(access_token): + # if request has access_token, and it's equal to ACCESS_TOKEN from env -> accept, esle - check user + return access_token and (access_token == os.environ.get('ACCESS_TOKEN')) + + +def check_export_access(): + return check_access_token(request.args.get('access_token', None)) \ + or (current_user.is_authenticated and current_user.is_admin) + + @app.route("/get_csv") -@login_required def get_csv(): from io import StringIO - if not current_user.is_admin: + if not check_export_access(): abort(403) response = get_stats() df = pd.read_json(StringIO(json.dumps(response))) @@ -521,9 +532,8 @@ def get_csv(): @app.route("/get_zip") -@login_required def get_zip(): - if not current_user.is_admin: + if not check_export_access(): abort(403) original_names = request.args.get('original_names', False) == 'true' diff --git a/app/templates/admin_pages_list.html b/app/templates/admin_pages_list.html new file mode 100644 index 00000000..69869528 --- /dev/null +++ b/app/templates/admin_pages_list.html @@ -0,0 +1,16 @@ +{% extends "root.html" %} + +{% block title %}Список страниц для администраторов{% endblock %} + +{% block main %} + +
      {% include "header.html" %}
      + + +{% endblock main %} diff --git a/app/templates/header.html b/app/templates/header.html index 5abe3a65..0cab17c0 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -17,6 +17,7 @@ Наборы критериев Логи + Список админ-страниц Celery Version {% endif %} diff --git a/app/templates/one_user_info.html b/app/templates/one_user_info.html new file mode 100644 index 00000000..fae90366 --- /dev/null +++ b/app/templates/one_user_info.html @@ -0,0 +1,65 @@ +{# Accepts: header dependicies, results, id, filename #} + + +{% extends "root.html" %} + +{% block title %}Информация о пользователях{% endblock %} + +{% block main %} + + +
      {% include "header.html" %}
      +
      +
      +

      + Страница пользователя: {{ user_info.username }} +

      + Список всех загрузок пользователя + + + + + + + + + + +
      UsernameNameFormatsCriteriaCount of checks
      +
      +
      + +{% endblock main %} diff --git a/app/templates/user_list.html b/app/templates/user_list.html new file mode 100644 index 00000000..d6c0baf1 --- /dev/null +++ b/app/templates/user_list.html @@ -0,0 +1,65 @@ +{# Accepts: header dependicies, results, id, filename #} + + +{% extends "root.html" %} + +{% block title %}Информация о пользователях{% endblock %} + +{% block main %} + + +
      {% include "header.html" %}
      +
      +
      +

      + Список пользователей: +

      + + + + + + + + + + +
      UsernameNameFormatsCriteriaCount of checks
      +
      +
      + +{% endblock main %} + + diff --git a/app/utils/__init__.py b/app/utils/__init__.py index 9b948c7f..6fe84709 100644 --- a/app/utils/__init__.py +++ b/app/utils/__init__.py @@ -5,7 +5,7 @@ from .get_file_len import get_file_len from .get_text_from_slides import get_text_from_slides from .parse_for_html import format_descriptions, format_header, find_tasks_on_slides_feedback, \ - tasks_conclusions_feedback + tasks_conclusions_feedback, name_of_image_check_results from .repeated_timer import RepeatedTimer from .timezone import timezone_offset from .check_file import check_file diff --git a/app/utils/checklist_filter.py b/app/utils/checklist_filter.py index 795c9d2a..9e809dde 100644 --- a/app/utils/checklist_filter.py +++ b/app/utils/checklist_filter.py @@ -8,7 +8,7 @@ logger = logging.getLogger('root_logger') FILTER_PREFIX = 'filter_' -def checklist_filter(data): +def checklist_filter(data, is_admin=False): from utils import timezone_offset filters = {key[len(FILTER_PREFIX):]: data[key] for key in data if key.startswith(FILTER_PREFIX)} @@ -80,7 +80,7 @@ def checklist_filter(data): logger.warning(repr(e)) # set user filter for current non-admin user - if not current_user.is_admin: + if not (is_admin or current_user.is_admin): filter_query["user"] = current_user.username return filter_query diff --git a/app/utils/parse_for_html.py b/app/utils/parse_for_html.py index b03376aa..d118810d 100644 --- a/app/utils/parse_for_html.py +++ b/app/utils/parse_for_html.py @@ -25,3 +25,9 @@ def tasks_conclusions_feedback(results): return format_header('Соответствует на {}%'.format(results[0]), tag.br), \ 'Распознанные заключения: ', \ *format_descriptions(results[2], tag.div_class, tag.br + tag.close_div) + +def name_of_image_check_results(result_str, all_captions): + return format_header(result_str) + \ + f'Список всех обнаруженных подписей:' + \ + ''.join(format_descriptions(all_captions, tag.div_class, tag.br + tag.close_div)) + diff --git a/assets/scripts/check_list.js b/assets/scripts/check_list.js index ade99bf7..1480cabf 100644 --- a/assets/scripts/check_list.js +++ b/assets/scripts/check_list.js @@ -253,4 +253,4 @@ function downdloadBlob(blob, filename) { document.body.appendChild(a); a.click(); a.remove(); -} \ No newline at end of file +} diff --git a/assets/scripts/main.js b/assets/scripts/main.js index 13ae8b9c..18bd5f35 100644 --- a/assets/scripts/main.js +++ b/assets/scripts/main.js @@ -28,6 +28,8 @@ import './version'; import './check_list'; import './logs'; import './admin_criterions'; +import './user_list'; +import './one_user_info' import '../favicon.ico'; import '../styles/404.css'; diff --git a/assets/scripts/one_user_info.js b/assets/scripts/one_user_info.js new file mode 100644 index 00000000..c8c46e39 --- /dev/null +++ b/assets/scripts/one_user_info.js @@ -0,0 +1,167 @@ +import { debounce, isFloat, resetTable, ajaxRequest, onPopState } from "./utils" + +let $table; +const AJAX_URL = "/users/data" +let debounceInterval = 500; + + +String.prototype.insert = function (index, string) { + if (index > 0) { + return this.substring(0, index) + string + this.substr(index) + } + return string + this +} + + +$(() => { + initTable() + window.onpopstate = onPopState + + const $dataFilter = $(".bootstrap-table-filter-control-result") + $dataFilter.on("keypress", (e) => { + const val = $dataFilter.val() + const numbers = val.split("-") + + if (e.key === ".") { + const carret = $dataFilter[0].selectionStart + let expectedStr + if (carret <= numbers[0].length) { + expectedStr = numbers[0].insert(carret, ".") + } else { + expectedStr = numbers[1].insert(carret - numbers[0].length - 1, ".") + } + + if (isFloat(expectedStr)) { + return + } + } + + if (e.key >= "0" && e.key <= "9") { + return + } + + if (e.key === "-") { + if (numbers.length === 1) { + return + } + } + + e.preventDefault() + }) +}) + + +function initTable() { + $table = $("#one-user-table"); + + // get query string + const queryString = window.location.search; + + // parse query search to js object + const params = Object.fromEntries(new URLSearchParams(queryString).entries()) + + // check correct order query + if (params.order !== "asc" && params.order !== "desc" && params.order !== "") { + params.order = "" + } + + // check correct sort query + if (params.sort !== "") { + let match = false + $table.find("th[data-sortable='true']").each(function () { + if ($(this).data("field") === params.sort) { + match = true + return false + } + }) + + if (match === false) { + params.sort = "" + } + } + + + // check pair of sort and order + if ([params.sort, params.order].includes("")) { + params.sort = "" + params.order = "" + } + + // Fill filters + $table.on("created-controls.bs.table", function () { + if (params.filter) { + params.filter = JSON.parse(decodeURI(params.filter)) + for (const [key, value] of Object.entries(params.filter)) { + const $input = $(`.bootstrap-table-filter-control-${key}`) + $input.val(value) + } + } + }) + + // activate bs table + $table.bootstrapTable({ + pageNumber: parseInt(params.page) || 1, + pageSize: parseInt(params.size) || 10, + sortName: params.sort, + sortOrder: params.order, + buttons: buttons, + detailView: true, + detailViewIcon: false, + detailViewByClick: true, + detailFormatter: detailFormatter, + + queryParams: queryParams, + ajax: debouncedAjaxRequest, + }) +} + +// debounced ajax calls. +const debouncedAjaxRequest = debounce(function(params) {ajaxRequest(AJAX_URL, params)}, debounceInterval); + + +function queryParams(params) { + let filters = {} + $('.filter-control').each(function () { + const name = $(this).parents("th").data("field") + const val = this.querySelector("input").value + if (val) { + filters[name] = val + } + }) + + const query = { + limit: params.limit, + offset: params.offset, + sort: params.sort, + order: params.order, + } + + if (!$.isEmptyObject(filters)) { + query.filter = JSON.stringify(filters) + } + + return query +} + + +function buttons() { + let buttonsObj = {} + + buttonsObj["ResetTable"] = { + text: 'Reset', + event: function() { resetTable($table, queryParams) } + } + + return buttonsObj +} + + +function detailFormatter(index, row) { + var html = [] + $.each(row, function (key, value) { + if (key === 'message' || key === 'pathname') { + html.push('

      ' + key + ': ' + row[key] + '

      ') + } + }) + return html.join('') +} diff --git a/assets/scripts/user_list.js b/assets/scripts/user_list.js new file mode 100644 index 00000000..3826cddd --- /dev/null +++ b/assets/scripts/user_list.js @@ -0,0 +1,176 @@ +import { debounce, isFloat, resetTable, ajaxRequest, onPopState } from "./utils" + +let $table; +const AJAX_URL = "/users/data" +let debounceInterval = 500; + + +String.prototype.insert = function (index, string) { + if (index > 0) { + return this.substring(0, index) + string + this.substr(index) + } + return string + this +} + + +$(() => { + initTable() + window.onpopstate = onPopState + + const $dataFilter = $(".bootstrap-table-filter-control-result") + $dataFilter.on("keypress", (e) => { + const val = $dataFilter.val() + const numbers = val.split("-") + + if (e.key === ".") { + const carret = $dataFilter[0].selectionStart + let expectedStr + if (carret <= numbers[0].length) { + expectedStr = numbers[0].insert(carret, ".") + } else { + expectedStr = numbers[1].insert(carret - numbers[0].length - 1, ".") + } + + if (isFloat(expectedStr)) { + return + } + } + + if (e.key >= "0" && e.key <= "9") { + return + } + + if (e.key === "-") { + if (numbers.length === 1) { + return + } + } + + e.preventDefault() + }) +}) + + +function initTable() { + $table = $("#user-list-table"); + + // get query string + const queryString = window.location.search; + + // parse query search to js object + const params = Object.fromEntries(new URLSearchParams(queryString).entries()) + + // check correct order query + if (params.order !== "asc" && params.order !== "desc" && params.order !== "") { + params.order = "" + } + + // check correct sort query + if (params.sort !== "") { + let match = false + $table.find("th[data-sortable='true']").each(function () { + if ($(this).data("field") === params.sort) { + match = true + return false + } + }) + + if (match === false) { + params.sort = "" + } + } + + + // check pair of sort and order + if ([params.sort, params.order].includes("")) { + params.sort = "" + params.order = "" + } + + // Fill filters + $table.on("created-controls.bs.table", function () { + if (params.filter) { + params.filter = JSON.parse(decodeURI(params.filter)) + for (const [key, value] of Object.entries(params.filter)) { + const $input = $(`.bootstrap-table-filter-control-${key}`) + $input.val(value) + } + } + }) + + // activate bs table + $table.bootstrapTable({ + pageNumber: parseInt(params.page) || 1, + pageSize: parseInt(params.size) || 10, + sortName: params.sort, + sortOrder: params.order, + buttons: buttons, + detailView: true, + detailViewIcon: false, + detailViewByClick: true, + detailFormatter: detailFormatter, + + queryParams: queryParams, + ajax: debouncedAjaxRequest, + + columns: [{ + field: "username", + formatter: usernameFormatter + }] + }) +} + +function usernameFormatter(value, row, index, field) { + return `${value}` +} + +// debounced ajax calls. +const debouncedAjaxRequest = debounce(function(params) {ajaxRequest(AJAX_URL, params)}, debounceInterval); + + +function queryParams(params) { + let filters = {} + $('.filter-control').each(function () { + const name = $(this).parents("th").data("field") + const val = this.querySelector("input").value + if (val) { + filters[name] = val + } + }) + + const query = { + limit: params.limit, + offset: params.offset, + sort: params.sort, + order: params.order, + } + + if (!$.isEmptyObject(filters)) { + query.filter = JSON.stringify(filters) + } + + return query +} + + +function buttons() { + let buttonsObj = {} + + buttonsObj["ResetTable"] = { + text: 'Reset', + event: function() { resetTable($table, queryParams) } + } + + return buttonsObj +} + + +function detailFormatter(index, row) { + var html = [] + $.each(row, function (key, value) { + if (key === 'message' || key === 'pathname') { + html.push('

      ' + key + ': ' + row[key] + '

      ') + } + }) + return html.join('') +} diff --git a/tests/README.md b/tests/README.md index 9f341b08..eb9c4586 100644 --- a/tests/README.md +++ b/tests/README.md @@ -70,4 +70,7 @@ class FileLoadTestSelenium(BasicSeleniumTest) with 3 tests Test check: if reports wit different extensions loads correctly use default documents from "/tests" or your own example +### Test for open criterion_packs_page: +class CriterionPacksTestSelenium +group of checks with '/criterion_packs' diff --git a/tests/main.py b/tests/main.py index e9de1e39..d8001d53 100644 --- a/tests/main.py +++ b/tests/main.py @@ -8,6 +8,8 @@ from test_single_card_check import SingleCheckTestSelenium from test_version import VersionTestSelenium from test_file_load import FileLoadTestSelenium +from test_criterion_packs_page import CriterionPacksTestSelenium +from test_admin_criterions_page import AdminCriterionsTestSelenium def parse_arguments(): parser = argparse.ArgumentParser(description='Run Selenium tests with specified data') @@ -26,7 +28,14 @@ def main(): args = parse_arguments() suite = unittest.TestSuite() - tests = (AuthTestSelenium, StatisticTestSelenium, FileLoadTestSelenium, SingleCheckTestSelenium, VersionTestSelenium) + tests = (AuthTestSelenium, + StatisticTestSelenium, + FileLoadTestSelenium, + SingleCheckTestSelenium, + VersionTestSelenium, + CriterionPacksTestSelenium, + AdminCriterionsTestSelenium) + param = (args.host, args.login, args.password, args.report, args.report_doc, args.pres) for test in tests: suite.addTest(BasicSeleniumTest.parametrize(test, param=param)) diff --git a/tests/scripts/docker_check_tests.sh b/tests/scripts/docker_check_tests.sh index 67deee05..b09d55f9 100755 --- a/tests/scripts/docker_check_tests.sh +++ b/tests/scripts/docker_check_tests.sh @@ -1,7 +1,7 @@ # !/bin/bash service="selenium-tests" -container_id=$(docker-compose -f docker-compose.yml -f docker-compose-selenium.yml ps -q $service) +container_id=$(docker compose -f docker-compose.yml -f docker-compose-selenium.yml ps -q $service) if [ -z "$container_id" ]; then echo "Контейнер сервиса $service не найден." @@ -17,11 +17,11 @@ echo "tests are finished" EXIT_CODE=$(docker inspect "$container_id" --format='{{.State.ExitCode}}') echo "tests logs:" -docker-compose -f docker-compose.yml -f docker-compose-selenium.yml logs selenium-tests +docker compose -f docker-compose.yml -f docker-compose-selenium.yml logs selenium-tests echo "web logs:" -docker-compose logs web +docker compose logs web echo "worker logs:" -docker-compose logs worker +docker compose logs worker if [ "$EXIT_CODE" -eq 0 ]; then echo "tests finished with code $EXIT_CODE (OK)" diff --git a/tests/test_admin_criterions_page.py b/tests/test_admin_criterions_page.py new file mode 100644 index 00000000..7048a0ae --- /dev/null +++ b/tests/test_admin_criterions_page.py @@ -0,0 +1,37 @@ +from basic_selenium_test import BasicSeleniumTest +from selenium.webdriver.common.by import By + +class AdminCriterionsTestSelenium(BasicSeleniumTest): + + def begin(self): + self.authorization() + URL = self.get_url('/admin') + self.get_driver().get(URL) + self.get_driver().implicitly_wait(30) + page_text = self.driver.find_element(By.TAG_NAME, "body").text + if 'администратор' not in page_text.lower(): + self.skipTest("This test runs inly with admin's login and password") + else: + URL = self.get_url('/admin/criterions') + self.get_driver().get(URL) + self.get_driver().implicitly_wait(30) + + + def test_open_criterions_list(self): + self.begin() + table = self.driver.find_element(By.ID, 'results_table') + self.assertNotEqual(table, None) + rows = table.find_elements(By.TAG_NAME, 'tr') + self.assertNotEqual(rows, None) + headers = self.driver.find_elements(By.XPATH, "//table//th") + header_id = any("id" in header.text.lower() for header in headers) + header_label = any('label' in header.text.lower() for header in headers) + self.assertTrue(header_id, 'Id header is not found') + self.assertTrue(header_label, 'label header is not found') + + def test_open_description(self): + self.begin() + button = self.driver.find_element(By.XPATH, '//table[contains(@class, "table")]//tbody/tr[1]/td[1]/i') + button.click() + description = self.driver.find_element(By.XPATH, '//table[contains(@class, "table")]//tbody/tr[2]/td').text.strip() + self.assertNotEqual(description, None) diff --git a/tests/test_criterion_packs_page.py b/tests/test_criterion_packs_page.py new file mode 100644 index 00000000..b94b56ef --- /dev/null +++ b/tests/test_criterion_packs_page.py @@ -0,0 +1,85 @@ +from basic_selenium_test import BasicSeleniumTest +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +# from selenium.common.exceptions import NoSuchElementException + +class CriterionPacksTestSelenium(BasicSeleniumTest): + + def begin(self): + self.authorization() + URL = self.get_url('/criterion_packs') + self.get_driver().get(URL) + self.get_driver().implicitly_wait(30) + + def pack_changing(self): + form = self.get_driver().find_element(By.ID, 'raw_criterions') + text_form = form.get_attribute('value') + form.clear() + form.send_keys(text_form) + save_button = self.get_driver().find_element(By.ID, 'pack_submit_button') + save_button.click() + WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, "success-text"))) + success_text = self.get_driver().find_element(By.ID, "success-text") + self.assertNotEqual(success_text, None) + + def pack_wrong_changing(self): + form = self.get_driver().find_element(By.ID, 'raw_criterions') + form.send_keys('some_wrong_text') + save_button = self.get_driver().find_element(By.ID, 'pack_submit_button') + save_button.click() + WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, "error-text"))) + error_text = self.get_driver().find_element(By.ID, "error-text") + self.assertNotEqual(error_text, None) + + def test_open_criterions_pack_list(self): + self.begin() + string_in_table = self.driver.find_element(By.XPATH, "//table[contains(@class, 'table')]//tbody/tr[1]/td[4]/a") + self.assertNotEqual(string_in_table, None) + headers = self.driver.find_elements(By.XPATH, "//table//th") + header_name = any("название" in header.text.lower() for header in headers) + header_type = any('тип' in header.text.lower() for header in headers) + header_edit = any('редактировать' in header.text.lower() for header in headers) + self.assertTrue(header_name, 'header "название" is not found') + self.assertTrue(header_type, 'header "тип" is not found') + self.assertTrue(header_edit, 'header "редактировать" is not found') + + # except NoSuchElementException: + # empty_table = self.driver.find_element(By.CLASS_NAME, "no-records-found") + # self.assertNotEqual(empty_table, None) + + def test_open_new_pack(self): + self.begin() + part_of_link_text = "создать" + xpath_expression = f"//a[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ', 'abcdefghijklmnopqrstuvwxyzабвгдеёжзийклмнопрстуфхцчшщъыьэюя'), '{part_of_link_text.lower()}')]" + link_element = self.driver.find_element(By.XPATH, xpath_expression) + link_element.click() + expected_url = self.get_url('/criterion_pack') + self.assertEqual(self.driver.current_url, expected_url) + + + def test_for_one_pack(self): + self.begin() + string_in_table = self.driver.find_element(By.XPATH, "//table[contains(@class, 'table')]//tr[1]/td[4]/a") + pack_name = string_in_table.get_attribute("href").split("/")[-1] + pack_type = self.driver.find_element(By.XPATH, "//table[contains(@class, 'table')]//tr[1]/td[2]").text.replace('.', ' ').split(' ')[0] + URL = self.get_url(f'/criterion_pack/{pack_name}') + self.get_driver().get(URL) + opened_pack_name = self.get_driver().find_element(By.ID, 'pack_name').get_attribute('value') + opened_pack_type = self.get_driver().find_element(By.ID, 'file_type') + select = Select(opened_pack_type) + selected_type_text = select.first_selected_option.text.strip() + self.assertEqual(pack_name, opened_pack_name) + self.assertEqual(pack_type, selected_type_text) + self.pack_changing() + self.pack_wrong_changing() + + + def test_pack_description(self): + self.authorization() + description = self.driver.find_element(By.ID, 'btn_table_info') + WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, "table_info"))) + table_info = self.get_driver().find_element(By.ID, "table_info") + rows = table_info.find_elements(By.TAG_NAME, 'li') + self.assertNotEqual(rows, None)