-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 21d663b
Showing
3 changed files
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# REMAP DEMAIL File Classifier | ||
|
||
`remap-demail-classifer` is a command line application written in | ||
Python3. It classifies fetched DEMAIL attachment files using a regex | ||
specification. | ||
|
||
There are no specific requirements for the application to run other | ||
than `>= Python3.6`. | ||
|
||
CLI arguments are as follows: | ||
|
||
``` | ||
./remap-demail-classifer <SPEC-FILE> <PREFIX-REGEX> <IGNORE-FILE> <DEMAIL-DIR> | ||
``` | ||
|
||
Example: | ||
|
||
``` | ||
./remap-demail-classifer spec.csv \ | ||
'^(?P<recdate>[0-9]{4}\-[0-9]{2}\-[0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}Z_[A-Z0-9]{32}_[A-Z0-9]{32}_' \ | ||
tmp/ignore.dat \ | ||
/data/remap/tenants/deployment/demail/downloaded | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import csv | ||
import re | ||
import sys | ||
from pathlib import Path | ||
from typing import Iterable, List, NamedTuple, Optional, Pattern | ||
|
||
|
||
#: Application version. | ||
__version__ = "0.0.1.dev0" | ||
|
||
|
||
class Spec(NamedTuple): | ||
""" | ||
Pattern-Template specification. | ||
""" | ||
|
||
#: Original filename pattern. | ||
pattern: Pattern | ||
|
||
#: Target filename template. | ||
template: str | ||
|
||
#: Sub directory for the target file. | ||
subdir: Path | ||
|
||
|
||
def classify(parent: Path, specs: List[Spec], path: Path) -> Optional[Path]: | ||
""" | ||
Attempts to classify the given path as per given patterns. | ||
""" | ||
## Get and trim filename: | ||
filename = path.name.strip() | ||
|
||
## Iterate over specs: | ||
for spec in specs: | ||
## Attempt to match: | ||
match = spec.pattern.match(filename) | ||
|
||
## If we have a match, get groups, skip otherwise: | ||
if match: | ||
## Yep, we have a match: | ||
groups = match.groupdict() | ||
|
||
## Attempt to compile the target filename: | ||
try: | ||
newfilename = spec.template.format(**groups) | ||
except KeyError as err: | ||
print(err) | ||
print(path) | ||
|
||
## Build and return the target path: | ||
return parent / spec.subdir / newfilename | ||
|
||
## We do not have any match, return None: | ||
return None | ||
|
||
|
||
def compile_specs(path: Path, prefix: str) -> List[Spec]: | ||
""" | ||
Reads given file and compiles specs. | ||
""" | ||
## Open the file: | ||
with path.open() as cfile: | ||
## Read the CSV, compile patterns and build specs: | ||
specs = [ | ||
Spec(re.compile(prefix + r["pattern"]), r["template"], Path(r["directory"])) | ||
for r in csv.DictReader(cfile) | ||
] | ||
|
||
## Return specs: | ||
return specs | ||
|
||
|
||
def main( | ||
specfile: Path, prefix: str, ignorepath: Path, parent: Path, paths: Iterable[Path] | ||
) -> None: | ||
""" | ||
Entrypoint. | ||
""" | ||
## Read in the specifications: | ||
specs = compile_specs(specfile, prefix) | ||
|
||
## Read in ignored files: | ||
ignores = set( | ||
i for i in (i.strip() for i in ignorepath.read_text().split("\n")) if i | ||
) | ||
|
||
## Attempt to classify each path: | ||
for path in paths: | ||
## Check if the path is ignored: | ||
if path.name in ignores: | ||
print(f"IGNORE: {path}") | ||
continue | ||
|
||
## Attempt: | ||
attempt = classify(parent, specs, path) | ||
|
||
## Success? | ||
if attempt is None: | ||
raise Exception(f"Path could not be classified. Filename: {path.name}") | ||
|
||
## Print: | ||
print(f"{path} -> {attempt}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main( | ||
Path(sys.argv[1]), | ||
sys.argv[2], | ||
Path(sys.argv[3]), | ||
Path("/files/"), | ||
(i for i in Path(sys.argv[4]).iterdir() if i.is_file()), | ||
) |