-
Notifications
You must be signed in to change notification settings - Fork 1
/
regex_search.py
executable file
·98 lines (83 loc) · 2.95 KB
/
regex_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!bin/python
#
# regex_search.py
# Usage:
# regex_search.py /path/to/textfiles/ expression
#
# If the user includes a path to search, we search that path, otherwise
# we search the current working directory. This only searches files
# that end in .txt
#
# Automate the boring stuff with python, chapter 9 exercise:
#
# Write a program that opens all .txt files in a folder
# and searches for any line that matches a user-supplied
# regular expression. The results should be printed
# to the screen.
# Imports
import os
import re
import sys
try:
from pathlib import Path
except ImportError:
print('''This program requires pathlib to be installed:
'pip install pathlib'
''')
try:
import magic
except ImportError:
print('''This program requires Python-magic to be installed:
'pip install Python-magic'
''')
previous_directory = Path.cwd() # Starting directory
mime = magic.Magic(mime=True) # We will determine filetype with PythonMagic
# Handle cli arguments
numArgs = len(sys.argv)
enoughArgs = numArgs >= 2
helpRequested = enoughArgs and sys.argv[1].lower() in ['?', 'help', '--help', '-h', '-help', 'h']
if not enoughArgs or helpRequested:
print('''
Usage:
regex_search.py [path] "expression"
Path is optional. If no path is included, the current
working directory is searched. Note: the regular expression
searched for must be surrounded by quotes to parse correctly.
''')
sys.exit()
# Parse sys.argv for path and regex pattern
includesPath = numArgs == 3 # True if 3 args, false otherwise
path = Path(Path.cwd()/Path(sys.argv[1])) if includesPath else Path.cwd()
regex = sys.argv[2] if includesPath else sys.argv[1]
# Validate path
if not path.exists():
print("Invalid input: Directory does not exist. Exiting.")
sys.exit()
# Validate user regex
try:
userRegex = re.compile(regex)
except re.error:
print("Invalid regex pattern. Exiting.")
sys.exit()
# Generator function to yield only files, not directories
def files(path):
for file in os.listdir(path):
if os.path.isfile(os.path.join(path,file)):
yield file
# work within the directory itself.
os.chdir(path)
# iterate through files or error if there are no text files
text_files = [f for f in files(Path.cwd()) if mime.from_file(f) == "text/plain"]
if len(text_files) == 0:
print(f"There are no text files to search in {path}. Exiting")
sys.exit()
for f in text_files:
filename = f
f = path/Path(f)
with open(f, encoding='utf_8') as file:
for line in enumerate(file.readlines()):
match = userRegex.search(line[1])
if match != None:
print(f"Pattern matched in file '{filename}' on line [{line[0]+1}]:> '{match.group()}'")
# go back to the original directory
os.chdir(previous_directory)