From d908165b2a17acd006a224a101480634d05b8997 Mon Sep 17 00:00:00 2001 From: Mike Hendricks Date: Thu, 14 Mar 2024 00:46:08 -0700 Subject: [PATCH] Add TextSearch for easily searching all workboxes for text --- .pre-commit-config.yaml | 1 + preditor/gui/find_files.py | 119 ++++++ preditor/gui/loggerwindow.py | 28 ++ preditor/gui/ui/find_files.ui | 140 +++++++ preditor/gui/ui/loggerwindow.ui | 42 ++- preditor/resource/img/README.md | 10 + preditor/resource/img/format-letter-case.svg | 1 + preditor/resource/img/regex.svg | 1 + preditor/utils/text_search.py | 342 ++++++++++++++++++ tests/find_files/re_greedy_False_0_True.md | 16 + tests/find_files/re_greedy_False_2_True.md | 25 ++ tests/find_files/re_greedy_True_2_True.md | 25 ++ .../find_files/re_greedy_upper_True_2_True.md | 13 + tests/find_files/re_simple_False_0_True.md | 16 + tests/find_files/re_simple_False_2_True.md | 25 ++ tests/find_files/re_simple_False_3_True.md | 25 ++ tests/find_files/re_simple_True_2_True.md | 25 ++ tests/find_files/simple_False_0_False.md | 16 + tests/find_files/simple_False_1_False.md | 22 ++ tests/find_files/simple_False_2_False.md | 25 ++ tests/find_files/simple_False_3_False.md | 25 ++ tests/find_files/simple_True_2_False.md | 25 ++ tests/find_files/tab_text.txt | 20 + tests/find_files/test_find_files.py | 74 ++++ 24 files changed, 1058 insertions(+), 3 deletions(-) create mode 100644 preditor/gui/find_files.py create mode 100644 preditor/gui/ui/find_files.ui create mode 100644 preditor/resource/img/format-letter-case.svg create mode 100644 preditor/resource/img/regex.svg create mode 100644 preditor/utils/text_search.py create mode 100644 tests/find_files/re_greedy_False_0_True.md create mode 100644 tests/find_files/re_greedy_False_2_True.md create mode 100644 tests/find_files/re_greedy_True_2_True.md create mode 100644 tests/find_files/re_greedy_upper_True_2_True.md create mode 100644 tests/find_files/re_simple_False_0_True.md create mode 100644 tests/find_files/re_simple_False_2_True.md create mode 100644 tests/find_files/re_simple_False_3_True.md create mode 100644 tests/find_files/re_simple_True_2_True.md create mode 100644 tests/find_files/simple_False_0_False.md create mode 100644 tests/find_files/simple_False_1_False.md create mode 100644 tests/find_files/simple_False_2_False.md create mode 100644 tests/find_files/simple_False_3_False.md create mode 100644 tests/find_files/simple_True_2_False.md create mode 100644 tests/find_files/tab_text.txt create mode 100644 tests/find_files/test_find_files.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 172c1b11..8818c33c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,7 @@ repos: - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace + exclude: ^(tests/find_files/) - repo: https://github.com/pycqa/isort rev: 5.12.0 diff --git a/preditor/gui/find_files.py b/preditor/gui/find_files.py new file mode 100644 index 00000000..18b2b575 --- /dev/null +++ b/preditor/gui/find_files.py @@ -0,0 +1,119 @@ +from __future__ import absolute_import, print_function + +from Qt.QtCore import Qt +from Qt.QtGui import QIcon +from Qt.QtWidgets import QApplication, QShortcut, QWidget + +from .. import resourcePath +from ..utils.text_search import RegexTextSearch, SimpleTextSearch +from . import loadUi + + +class FindFiles(QWidget): + def __init__(self, parent=None, managers=None, console=None): + super(FindFiles, self).__init__(parent=parent) + if managers is None: + managers = [] + self.managers = managers + self.console = console + self.finder = None + self.match_files_count = 0 + + loadUi(__file__, self) + + # Set the icons + self.uiCaseSensitiveBTN.setIcon( + QIcon(resourcePath("img/format-letter-case.svg")) + ) + self.uiCloseBTN.setIcon(QIcon(resourcePath('img/close-thick.png'))) + self.uiRegexBTN.setIcon(QIcon(resourcePath("img/regex.svg"))) + + # Create shortcuts + self.uiCloseSCT = QShortcut( + Qt.Key_Escape, self, context=Qt.WidgetWithChildrenShortcut + ) + self.uiCloseSCT.activated.connect(self.hide) + + self.uiCaseSensitiveSCT = QShortcut( + Qt.AltModifier | Qt.Key_C, self, context=Qt.WidgetWithChildrenShortcut + ) + self.uiCaseSensitiveSCT.activated.connect(self.uiCaseSensitiveBTN.toggle) + + self.uiRegexSCT = QShortcut( + Qt.AltModifier | Qt.Key_R, self, context=Qt.WidgetWithChildrenShortcut + ) + self.uiRegexSCT.activated.connect(self.uiRegexBTN.toggle) + + def activate(self): + """Called to make this widget ready for the user to interact with.""" + self.show() + self.uiFindTXT.setFocus() + + def find(self): + find_text = self.uiFindTXT.text() + context = self.uiContextSPN.value() + # Create an instance of the TextSearch to use for this search + if self.uiRegexBTN.isChecked(): + TextSearch = RegexTextSearch + else: + TextSearch = SimpleTextSearch + self.finder = TextSearch( + find_text, self.uiCaseSensitiveBTN.isChecked(), context=context + ) + self.finder.callback_matching = self.insert_found_text + self.finder.callback_non_matching = self.insert_text + + self.insert_text(self.finder.title()) + + self.match_files_count = 0 + for manager in self.managers: + for ( + editor, + group_name, + tab_name, + group_index, + tab_index, + ) in manager.all_widgets(): + path = "/".join((group_name, tab_name)) + workbox_id = '{},{}'.format(group_index, tab_index) + self.find_in_editor(editor, path, workbox_id) + + self.insert_text( + '\n{} matches in {} workboxes\n'.format( + self.finder.match_count, self.match_files_count + ) + ) + + def find_in_editor(self, editor, path, workbox_id): + # Ensure the editor text is loaded and get its raw text + editor.__show__() + text = editor.__text__() + + # Use the finder to check for matches + found = self.finder.search_text(text, path, workbox_id) + if found: + self.match_files_count += 1 + + def insert_found_text(self, text, workbox_id, line_num, tool_tip): + href = ', {}, {}'.format(workbox_id, line_num) + cursor = self.console.textCursor() + # Insert hyperlink + fmt = cursor.charFormat() + fmt.setAnchor(True) + fmt.setAnchorHref(href) + fmt.setFontUnderline(True) + fmt.setToolTip(tool_tip) + cursor.insertText(text, fmt) + # Show the updated text output + QApplication.instance().processEvents() + + def insert_text(self, text): + cursor = self.console.textCursor() + fmt = cursor.charFormat() + fmt.setAnchor(False) + fmt.setAnchorHref('') + fmt.setFontUnderline(False) + fmt.setToolTip('') + cursor.insertText(text, fmt) + # Show the updated text output + QApplication.instance().processEvents() diff --git a/preditor/gui/loggerwindow.py b/preditor/gui/loggerwindow.py index 3c29d127..9c33d967 100644 --- a/preditor/gui/loggerwindow.py +++ b/preditor/gui/loggerwindow.py @@ -105,6 +105,11 @@ def __init__(self, parent, name=None, run_workbox=False, standalone=False): ) self.uiConsoleTOOLBAR.insertSeparator(self.uiRunSelectedACT) + # Configure Find in Workboxes + self.uiFindInWorkboxesWGT.hide() + self.uiFindInWorkboxesWGT.managers.append(self.uiWorkboxTAB) + self.uiFindInWorkboxesWGT.console = self.console() + # Initial configuration of the logToFile feature self._logToFilePath = None self._stds = None @@ -680,6 +685,12 @@ def recordPrefs(self, manual=False): 'textEditorCmdTempl': self.textEditorCmdTempl, 'currentStyleSheet': self._stylesheet, 'flash_time': self.uiConsoleTXT.flash_time, + 'find_files_regex': self.uiFindInWorkboxesWGT.uiRegexBTN.isChecked(), + 'find_files_cs': ( + self.uiFindInWorkboxesWGT.uiCaseSensitiveBTN.isChecked() + ), + 'find_files_context': self.uiFindInWorkboxesWGT.uiContextSPN.value(), + 'find_files_text': self.uiFindInWorkboxesWGT.uiFindTXT.text(), } ) @@ -786,6 +797,18 @@ def restorePrefs(self): ) self.uiErrorHyperlinksACT.setChecked(pref.get('uiErrorHyperlinksACT', True)) + # Find Files settings + self.uiFindInWorkboxesWGT.uiRegexBTN.setChecked( + pref.get('find_files_regex', False) + ) + self.uiFindInWorkboxesWGT.uiCaseSensitiveBTN.setChecked( + pref.get('find_files_cs', False) + ) + self.uiFindInWorkboxesWGT.uiContextSPN.setValue( + pref.get('find_files_context', 3) + ) + self.uiFindInWorkboxesWGT.uiFindTXT.setText(pref.get('find_files_text', '')) + # External text editor filepath and command template defaultExePath = r"C:\Program Files\Sublime Text 3\sublime_text.exe" defaultCmd = r"{exePath} {modulePath}:{lineNum}" @@ -1023,6 +1046,11 @@ def showEvent(self, event): def show_workbox_options(self): self.uiWorkboxSTACK.setCurrentIndex(WorkboxPages.Options) + @Slot() + def show_find_in_workboxes(self): + """Ensure the find workboxes widget is visible and has focus.""" + self.uiFindInWorkboxesWGT.activate() + @Slot() def show_focus_name(self): model = GroupTabListItemModel(manager=self.uiWorkboxTAB) diff --git a/preditor/gui/ui/find_files.ui b/preditor/gui/ui/find_files.ui new file mode 100644 index 00000000..4068be89 --- /dev/null +++ b/preditor/gui/ui/find_files.ui @@ -0,0 +1,140 @@ + + + uiFindFilesWGT + + + + 0 + 0 + 636 + 41 + + + + Form + + + + + + Find: + + + + + + + + + Regex (Alt + R) + + + Regex + + + true + + + + + + + Case Sensitive (Alt + C) + + + Case Sensitive + + + true + + + + + + + # of lines of context to show + + + QAbstractSpinBox::PlusMinus + + + 2 + + + + + + + + + + + + Find + + + + + + + x + + + + + + + + + uiFindBTN + released() + uiFindFilesWGT + find() + + + 601 + 31 + + + 421 + 29 + + + + + uiFindTXT + returnPressed() + uiFindFilesWGT + find() + + + 488 + 23 + + + 501 + 65 + + + + + uiCloseBTN + released() + uiFindFilesWGT + hide() + + + 620 + 19 + + + 676 + 24 + + + + + + find() + + diff --git a/preditor/gui/ui/loggerwindow.ui b/preditor/gui/ui/loggerwindow.ui index 4fc142c3..1b26ab25 100644 --- a/preditor/gui/ui/loggerwindow.ui +++ b/preditor/gui/ui/loggerwindow.ui @@ -81,6 +81,9 @@ + + + @@ -131,7 +134,7 @@ - &Run + Run @@ -322,6 +325,8 @@ + + @@ -951,6 +956,14 @@ at the indicated line in the specified text editor. Ctrl+Alt+Shift+R + + + Find in Workboxes + + + Ctrl+Shift+F + + @@ -969,6 +982,12 @@ at the indicated line in the specified text editor. QWidget
preditor.gui.editor_chooser.h
+ + FindFiles + QWidget +
preditor.gui.find_files.h
+ 1 +
@@ -1011,8 +1030,8 @@ at the indicated line in the specified text editor. update_workbox_stack() - 754 - 377 + 763 + 371 747 @@ -1020,11 +1039,28 @@ at the indicated line in the specified text editor. + + uiFindInWorkboxesACT + triggered() + PrEditorWindow + show_find_in_workboxes() + + + -1 + -1 + + + 397 + 202 + + + apply_options() reset_options() show_workbox_options() update_workbox_stack() + show_find_in_workboxes() diff --git a/preditor/resource/img/README.md b/preditor/resource/img/README.md index 6649bb0b..d3c78d01 100644 --- a/preditor/resource/img/README.md +++ b/preditor/resource/img/README.md @@ -5,3 +5,13 @@ Converted to multi-resolution icon using: https://convertico.com/svg-to-ico/ Most other icons downloaded from https://materialdesignicons.com/. + +Svg icons are preferred as they are plain text files that play nicely with git. +Please make sure to update the sources table when adding or updating images. + +# Sources for resources + +| File | Source | Notes | Author | +|---|---|---|---| +| ![](preditor/resource/img/format-letter-case.svg) [format-letter-case.svg](preditor/resource/img/format-letter-case.svg) | https://pictogrammers.com/library/mdi/icon/format-letter-case/ | | [Austin Andrews](https://pictogrammers.com/contributor/Templarian/) | +| ![](preditor/resource/img/regex.svg) [regex.svg](preditor/resource/img/regex.svg) | https://pictogrammers.com/library/mdi/icon/regex/ | | [Doug C. Hardester](https://pictogrammers.com/contributor/r3volution11/) | diff --git a/preditor/resource/img/format-letter-case.svg b/preditor/resource/img/format-letter-case.svg new file mode 100644 index 00000000..eaaf7543 --- /dev/null +++ b/preditor/resource/img/format-letter-case.svg @@ -0,0 +1 @@ + diff --git a/preditor/resource/img/regex.svg b/preditor/resource/img/regex.svg new file mode 100644 index 00000000..feea451d --- /dev/null +++ b/preditor/resource/img/regex.svg @@ -0,0 +1 @@ + diff --git a/preditor/utils/text_search.py b/preditor/utils/text_search.py new file mode 100644 index 00000000..bf64b63d --- /dev/null +++ b/preditor/utils/text_search.py @@ -0,0 +1,342 @@ +from __future__ import absolute_import, print_function + +import abc +import re +from collections import deque + +from future.utils import with_metaclass + + +class TextSearch(with_metaclass(abc.ABCMeta, object)): + """Base class used to search and markup text for matches to a search term. + + Parameters: + callback_matching (callable): Called when matching text should be written. + See `print_matching` to see what inputs it must accept. + callback_non_matching (callable): Called when plain text should be written. + See `print_non_matching` to see what inputs it must accept. + gap_format (str): A format string used to indicate when there is a gap in + the results shown. These variables are provided when formatting. `dot` + `dot` is a `.` for each digit required to show the current line number. + `padding` can be used to properly pad `dot`. + margin_format (str): A format string used to generate the line number + text at the start of a text line. These variables are provided when + formatting. `line_num` the current line number as an int. `padding` + can be used to properly pad `line_num`. `match_indicator` is a string + that is a `:` if the line contains a match, otherwise an empty space. + match_count (int): The number times matching text was found including + multiple finds on the same line. This value is not reset internally + and can be used to track all matches across multiple calls of `search_text`. + padding (int): Set by `search_text` to the number of digits required to + show all line numbers in the document. Used to ensure consistent number + padding for all line numbers printed in the margin and gaps. + + Args: + find_text (str): The text this finder will search for. + case_sensitive (bool): When searching text should it ignore case. + context (int): The number of lines to show before and after a line with + a match. + """ + + def __init__(self, find_text, case_sensitive=False, context=3): + self._padding = 0 + self.case_sensitive = case_sensitive + self.context = context + self.find_text = find_text + self.gap_format = " {dot: >{padding}} \n" + self.margin_format = " {line_num: >{padding}}{match_indicator} " + self.match_count = 0 + + self.callback_matching = self.print_matching + self.callback_non_matching = self.print_non_matching + + def clear_cache(self): + """The finder can implement this to clear any cached data. + + This is called when no matches have been found beyond the # of context lines + """ + + @abc.abstractmethod + def indicate_line(self, line): + """Yields chunks of line and if each chunk should be indicated. + + The first yield should always be `(None, bool)`. The None value indicates + that the margin should be printed. This triggers printing of `self.margin` + passing the bool to the match_found argument. + + Yields: + text (str or None): The text to be printed. + indicate (bool): Should text treated as a match for the search term. + """ + + def indicate_results( + self, line, line_num, path="undefined", workbox_id="undefined" + ): + """Writes a single line adding markup for any matches on the line.""" + tool_tip = "Open {} at line number {}".format(path, line_num) + for text, indicate in self.indicate_line(line): + # Print the margin text after the finder tells us if the line matches + if text is None: + self.callback_non_matching(self.margin(line_num, indicate)) + continue + + # Otherwise print the next section of the line text + if indicate: + self.callback_matching(text, workbox_id, line_num, tool_tip) + else: + self.callback_non_matching(text) + + def insert_lines(self, start, *lines, info): + """Inserts multiple lines adding links for any matching search terms. + + Args: + start (int): The line number of the first line to insert. + *lines (str): Each line to insert. They will be prefixed with line + numbers starting with start. + info (dict): Kwargs passed to indicate_results. + + Returns: + int: The line number of the last line that was inserted. + """ + for i, line in enumerate(lines): + # Note: The `+ 1` is due to line numbers being 1 based not zero based + self.indicate_results(line, start + i + 1, **info) + + return start + i + + def margin(self, line_num, match_found): + """Returns the margin text rendered and ready to print. + + Args: + line_num (int): The line number to show in the margin. + match_found (bool): Controls the indicator of if this line has any + matching text. If True then a `:` is inserted otherwise a space. + """ + match_indicator = ":" if match_found else " " + return self.margin_format.format( + line_num=line_num, match_indicator=match_indicator, padding=self._padding + ) + + @abc.abstractmethod + def matches(self, line): + """Returns bool for if find_text is contained in this line.""" + + def print_matching(self, text, workbox_id, line_num, tool_tip): + """Simple callback for `callback_matching` that prints text. + + The print does not insert an newline character. + + Args: + text (str): The matching text to display. This will be inserted + into a markdown link as the link text. + workbox_id (str): From `GroupTabWidget.all_widgets`, the group_tab_index + and widget_tab_index joined by a comma without a space. Used as + the url of the link. Example: `3,1`. + line_number (int): The line number the url should navigate to. + tool_tip (str): Added as a title to the link to show up as a tool tip. + """ + href = ', {}, {}'.format(workbox_id, line_num) + print('[{}]({} "{}")'.format(text, href, tool_tip), end="") + + def print_non_matching(self, text): + """Simple callback for `callback_non_matching` that prints text. + + The print does not insert an newline character. + """ + print(text, end="") + + def search_text(self, text, path, workbox_id): + """Search each line of text for matching text and write the the matches + including context lines. + + Args: + text (str): The text to search. + path (str): The workbox name this text represents. Should be the + Group_name and tab_name separated by a `/`. + workbox_id (str): From `GroupTabWidget.all_widgets`, the group_tab_index + and widget_tab_index joined by a comma without a space. Used as + the url of the link. Example: `3,1`. + """ + # NOTE: splitlines discards the "newline at end of file" so it doesn't + # show up in the final search results. + lines = text.splitlines(keepends=True) + + # Calculate the padding count so we can ensure all line numbers and gaps + # are consistently spaced in the margins. + self._padding = len(str(len(lines))) + + # Buffer to record up to context lines of text. This will be printed + # only if we find a match in the middle of the document. + # https://stackoverflow.com/a/52009859 + pre_history = deque(maxlen=self.context) + remaining_context_lines = 0 + + # last_insert keeps track of the last time we inserted a line. This lets + # us keep track of if there is a gap in output and we need to add dots. + last_insert = 0 + found = False + + for i, line in enumerate(lines): + info = dict(path=path, workbox_id=workbox_id) + if self.matches(line): + len_pre_history = len(pre_history) + if not found: + # Print the path on the first find + self.callback_non_matching("# File: ") + tool_tip = "Open {}".format(path) + self.callback_matching(path, workbox_id, 0, tool_tip) + self.callback_non_matching("\n") + found = True + elif i - last_insert - 1 - len_pre_history > 0: + # If there is a in output larger than context, insert dots + # for the width of the line numbers to indicate the gap. + self.callback_non_matching( + self.gap_format.format( + dot='.' * len(str(i)), padding=self._padding + ) + ) + # Add the matching line the pre-context of the matching line. + last_insert = self.insert_lines( + i - len_pre_history, *pre_history, line, info=info + ) + # Reset the pre-context history now that we have printed it. + pre_history.clear() + # Reset the post context line count so we will print the full + # context after this latest match if no other matches are found. + remaining_context_lines = self.context + else: + if remaining_context_lines > 0: + # Print any remaining context lines after we found a result + last_insert = self.insert_lines(i, line, info=info) + remaining_context_lines -= 1 + else: + # If we don't need to print any post context lines record + # this line into pre-context history so we can print it if + # we find a match on the next line. + # When deque reaches maxlen lines, it automatically evicts oldest + pre_history.append(line) + # Clear any cached match information the finder may have stored. + self.clear_cache() + + # Return if this file contained any matches + return found + + def title(self): + return '\nFind in workboxs: "{}"{}\n\n'.format(self.find_text, self.title_flags) + + @property + @abc.abstractmethod + def title_flags(self): + """Returns the text to show in the title for flags.""" + + +class RegexTextSearch(TextSearch): + """TextSearch that processes the text using regex.""" + + def __init__(self, find_text, case_sensitive=False, context=3): + super(RegexTextSearch, self).__init__( + find_text, case_sensitive, context=context + ) + self.pattern = re.compile(find_text, flags=0 if case_sensitive else re.I) + # Cache regex match objects between the `matches` call and `indicate_line` + # The key is the original line of text + self._matches = {} + + def clear_cache(self): + # Reset regex cache for the next call to `matches` + self._matches = {} + + def indicate_line(self, line): + # Check if this line is a match. + match = self._matches.get(line) + # Write the margin indicating if this line has any matches + yield None, bool(match) + + start = 0 + if match: + for m in match: + pre = line[start : m.start()] + if pre: + yield pre, False + yield line[m.start() : m.end()], True + start = m.end() + # Record the match + self.match_count += 1 + post = line[start:] + if post: + yield post, False + else: + yield line, False + + def matches(self, line): + self._matches[line] = list(self.pattern.finditer(line)) + return bool(self._matches[line]) + + @property + def title_flags(self): + if self.case_sensitive: + return " (regex, case sensitive)" + return " (regex)" + + +class SimpleTextSearch(TextSearch): + """A simple text matching finder that optionally considers case.""" + + def __init__(self, find_text, case_sensitive=False, context=3): + super(SimpleTextSearch, self).__init__( + find_text, case_sensitive, context=context + ) + # Assign the correct matching method based on the desired case setting + if case_sensitive: + self._matches = self._search_text_case_sensitive + else: + self._matches = self._search_text + find_text = self.find_text.lower() + # Preserve the original find_text value but cache the value needed internally + self._find_text = find_text + + def _search_text(self, line): + """Check for pattern ignoring case.""" + return self._find_text in line.lower() + + def _search_text_case_sensitive(self, line): + """Check for pattern matching case.""" + return self._find_text in line + + def indicate_line(self, line): + # Handle case sensitivity setting, ensuring return of the correct case + original_line = line + if not self.case_sensitive: + line = line.lower() + + find_len = len(self._find_text) + start = 0 + end = line.find(self._find_text) + # Write the margin indicating if this line has any matches + yield None, end != -1 + + # Write the text of the line with indications + while end != -1: + # insert prefix text + yield original_line[start:end], False + # insert indicated text preserving case + yield original_line[end : end + find_len], True + # Record the match + self.match_count += 1 + + # Check for any more matches in this line + start = end + find_len + end = line.find(self._find_text, start) + + # Include text at the end of the line + if end < find_len: + yield original_line[start:], False + + def matches(self, line): + return self._matches(line) + + @property + def title_flags(self): + if self.case_sensitive: + return " (case sensitive)" + return "" diff --git a/tests/find_files/re_greedy_False_0_True.md b/tests/find_files/re_greedy_False_0_True.md new file mode 100644 index 00000000..66f3f75c --- /dev/null +++ b/tests/find_files/re_greedy_False_0_True.md @@ -0,0 +1,16 @@ + +Find in workboxs: "search.+term" (regex) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + . + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + . + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + .. + 11: [search is in this line but the actual search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + .. + 17: This line has underscores around _[search term_ and "search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term in multiple times... search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term the search term with the search term](, 1,2, 19 "Open First Group/First Tab at line number 19") diff --git a/tests/find_files/re_greedy_False_2_True.md b/tests/find_files/re_greedy_False_2_True.md new file mode 100644 index 00000000..351c9d51 --- /dev/null +++ b/tests/find_files/re_greedy_False_2_True.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search.+term" (regex) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: [search is in this line but the actual search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term_ and "search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term in multiple times... search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term the search term with the search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/re_greedy_True_2_True.md b/tests/find_files/re_greedy_True_2_True.md new file mode 100644 index 00000000..4c99718d --- /dev/null +++ b/tests/find_files/re_greedy_True_2_True.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search.+term" (regex, case sensitive) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1 Search term + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: [search is in this line but the actual search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term_ and "search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term in multiple times... search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: Search term the [search term with the search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/re_greedy_upper_True_2_True.md b/tests/find_files/re_greedy_upper_True_2_True.md new file mode 100644 index 00000000..da713cf3 --- /dev/null +++ b/tests/find_files/re_greedy_upper_True_2_True.md @@ -0,0 +1,13 @@ + +Find in workboxs: "Search.+term" (regex, case sensitive) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3 search term + .. + 17 This line has underscores around _search term_ and "search term" has double quotes + 18 This line has the search term in multiple times... search term + 19: [Search term the search term with the search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/re_simple_False_0_True.md b/tests/find_files/re_simple_False_0_True.md new file mode 100644 index 00000000..10a07f35 --- /dev/null +++ b/tests/find_files/re_simple_False_0_True.md @@ -0,0 +1,16 @@ + +Find in workboxs: "search term" (regex) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + . + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + . + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + .. + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + .. + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") diff --git a/tests/find_files/re_simple_False_2_True.md b/tests/find_files/re_simple_False_2_True.md new file mode 100644 index 00000000..337bcdd0 --- /dev/null +++ b/tests/find_files/re_simple_False_2_True.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" (regex) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/re_simple_False_3_True.md b/tests/find_files/re_simple_False_3_True.md new file mode 100644 index 00000000..de201000 --- /dev/null +++ b/tests/find_files/re_simple_False_3_True.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" (regex) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + 14 line 14 + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/re_simple_True_2_True.md b/tests/find_files/re_simple_True_2_True.md new file mode 100644 index 00000000..51c4418b --- /dev/null +++ b/tests/find_files/re_simple_True_2_True.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" (regex, case sensitive) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1 Search term + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: Search term the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/simple_False_0_False.md b/tests/find_files/simple_False_0_False.md new file mode 100644 index 00000000..b00f25f4 --- /dev/null +++ b/tests/find_files/simple_False_0_False.md @@ -0,0 +1,16 @@ + +Find in workboxs: "search term" + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + . + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + . + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + .. + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + .. + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") diff --git a/tests/find_files/simple_False_1_False.md b/tests/find_files/simple_False_1_False.md new file mode 100644 index 00000000..e86b8db6 --- /dev/null +++ b/tests/find_files/simple_False_1_False.md @@ -0,0 +1,22 @@ + +Find in workboxs: "search term" + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + .. + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + .. + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/simple_False_2_False.md b/tests/find_files/simple_False_2_False.md new file mode 100644 index 00000000..571ac959 --- /dev/null +++ b/tests/find_files/simple_False_2_False.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/simple_False_3_False.md b/tests/find_files/simple_False_3_False.md new file mode 100644 index 00000000..aee89253 --- /dev/null +++ b/tests/find_files/simple_False_3_False.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1: [Search term](, 1,2, 1 "Open First Group/First Tab at line number 1") + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + 14 line 14 + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: [Search term](, 1,2, 19 "Open First Group/First Tab at line number 19") the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/simple_True_2_False.md b/tests/find_files/simple_True_2_False.md new file mode 100644 index 00000000..d57b79fd --- /dev/null +++ b/tests/find_files/simple_True_2_False.md @@ -0,0 +1,25 @@ + +Find in workboxs: "search term" (case sensitive) + + +# File: [First Group/First Tab](, 1,2, 0 "Open First Group/First Tab") + 1 Search term + 2 line 2 + 3: [search term](, 1,2, 3 "Open First Group/First Tab at line number 3") + 4 + 5 line 5 + 6: [search term](, 1,2, 6 "Open First Group/First Tab at line number 6") at the start of the line + 7 line 7 + 8 line 8 + 9 line 9 + 10 line 10 + 11: search is in this line but the actual [search term](, 1,2, 11 "Open First Group/First Tab at line number 11") is in the middle + 12 + 13 + .. + 15 line 15 + 16 + 17: This line has underscores around _[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")_ and "[search term](, 1,2, 17 "Open First Group/First Tab at line number 17")" has double quotes + 18: This line has the [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") in multiple times... [search term](, 1,2, 18 "Open First Group/First Tab at line number 18") + 19: Search term the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") with the [search term](, 1,2, 19 "Open First Group/First Tab at line number 19") + 20 The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/tab_text.txt b/tests/find_files/tab_text.txt new file mode 100644 index 00000000..ef54b0ed --- /dev/null +++ b/tests/find_files/tab_text.txt @@ -0,0 +1,20 @@ +Search term +line 2 +search term + +line 5 +search term at the start of the line +line 7 +line 8 +line 9 +line 10 +search is in this line but the actual search term is in the middle + + +line 14 +line 15 + +This line has underscores around _search term_ and "search term" has double quotes +This line has the search term in multiple times... search term +Search term the search term with the search term +The "newline at end of file" is not printed due to str.splitlines. diff --git a/tests/find_files/test_find_files.py b/tests/find_files/test_find_files.py new file mode 100644 index 00000000..57344827 --- /dev/null +++ b/tests/find_files/test_find_files.py @@ -0,0 +1,74 @@ +import os + +import pytest + +from preditor.utils.text_search import RegexTextSearch, SimpleTextSearch + + +def text_for_test(filename): + dirname = os.path.dirname(__file__) + filename = os.path.join(dirname, filename) + with open(filename) as fle: + return fle.read() + + +@pytest.mark.parametrize( + "check_type,search_text,is_cs,context,is_re", + ( + # Simple text search testing context and case + ("simple", "search term", False, 0, False), + ("simple", "search term", False, 1, False), + ("simple", "search term", False, 2, False), + ("simple", "search term", False, 3, False), + ("simple", "search term", True, 2, False), + # Regex search testing context and case + ("re_simple", "search term", False, 0, True), + ("re_simple", "search term", False, 2, True), + ("re_simple", "search term", False, 3, True), + ("re_simple", "search term", True, 2, True), + # Complex regex with a greedy search term + ("re_greedy", "search.+term", False, 0, True), + ("re_greedy", "search.+term", False, 2, True), + ("re_greedy", "search.+term", True, 2, True), + ("re_greedy_upper", "Search.+term", True, 2, True), + ), +) +def test_find_files(capsys, check_type, search_text, is_cs, context, is_re): + workbox_id = "1,2" + path = 'First Group/First Tab' + text = text_for_test("tab_text.txt") + + if is_re: + TextSearch = RegexTextSearch + else: + TextSearch = SimpleTextSearch + + search = TextSearch(search_text, case_sensitive=is_cs, context=context) + # Add the title to the printed output so title is tested when checking + # `captured.out` later. + print(search.title()) + + # Generate the search text and print it to `captured.out` so we can check + search.search_text(text, path, workbox_id) + + captured = capsys.readouterr() + check_filename = "{}_{}_{}_{}.md".format(check_type, is_cs, context, is_re) + check = text_for_test(check_filename) + + # To update tests, print text and save over top of the md. Then verify + # that it is actually rendered properly. You will need to add one trailing + # space after dot lines, two spaces after blank lines, and ensue the end of + # file newline is present. The default print callbacks use markdown links, + # but don't really render valid markdown. If you want to render to html, + # use regular markdown not github flavored. + # print(check_filename) + # print(captured.out) + + # print('*' * 50) + # for line in check.rstrip().splitlines(keepends=True): + # print([line]) + # print('*' * 50) + # for line in captured.out.splitlines(keepends=True): + # print([line]) + + assert captured.out == check