Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Customizable destination folder structure #103

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ dependencies = [
"shtab",
"websockets>=10.1",
"babel",
"PyYAML",
"importlib_resources"
]

[project.scripts]
Expand Down
7 changes: 2 additions & 5 deletions pytr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,15 @@
import ssl
import requests
import websockets

from ecdsa import NIST256p, SigningKey
from ecdsa.util import sigencode_der
from http.cookiejar import MozillaCookieJar

from pytr.utils import get_logger
from pytr.app_path import *
Katzmann1983 marked this conversation as resolved.
Show resolved Hide resolved


home = pathlib.Path.home()
BASE_DIR = home / '.pytr'
CREDENTIALS_FILE = BASE_DIR / 'credentials'
KEY_FILE = BASE_DIR / 'keyfile.pem'
COOKIES_FILE = BASE_DIR / 'cookies.txt'


class TradeRepublicApi:
Expand Down
10 changes: 10 additions & 0 deletions pytr/app_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pathlib
Katzmann1983 marked this conversation as resolved.
Show resolved Hide resolved

home = pathlib.Path.home()
BASE_DIR = home / '.pytr'

CREDENTIALS_FILE = BASE_DIR / 'credentials'
KEY_FILE = BASE_DIR / 'keyfile.pem'
COOKIES_FILE = BASE_DIR / 'cookies.txt'

DESTINATION_CONFIG_FILE = BASE_DIR / 'file_destination_config.yaml'
Empty file added pytr/config/__init__.py
Empty file.
387 changes: 387 additions & 0 deletions pytr/config/file_destination_config__template.yaml

Large diffs are not rendered by default.

88 changes: 26 additions & 62 deletions pytr/dl.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
import re
import os

from concurrent.futures import as_completed
from pathlib import Path
from requests_futures.sessions import FuturesSession
from requests import session
from datetime import datetime

from pathvalidate import sanitize_filepath

from pytr.utils import preview, get_logger
from pytr.api import TradeRepublicError
from pytr.timeline import Timeline
from pytr.file_destination_provider import FileDestinationProvider

class DL:
def __init__(
self,
tr,
output_path,
filename_fmt,
file_destination_provider:FileDestinationProvider,
Katzmann1983 marked this conversation as resolved.
Show resolved Hide resolved
since_timestamp=0,
history_file='pytr_history',
max_workers=8,
Expand All @@ -25,13 +27,13 @@ def __init__(
'''
tr: api object
output_path: name of the directory where the downloaded files are saved
filename_fmt: format string to customize the file names
file_destination_provider: The destination provider for the file path and file names based on the event type and other parameters.
since_timestamp: downloaded files since this date (unix timestamp)
'''
self.tr = tr
self.output_path = Path(output_path)
self.history_file = self.output_path / history_file
self.filename_fmt = filename_fmt
self.file_destination_provider = file_destination_provider
self.since_timestamp = since_timestamp
self.universal_filepath = universal_filepath

Expand Down Expand Up @@ -83,75 +85,37 @@ async def dl_loop(self):
else:
self.log.warning(f"unmatched subscription of type '{subscription['type']}':\n{preview(response)}")

def dl_doc(self, doc, titleText, subtitleText, subfolder=None):
def dl_doc(self, doc, event_type: str, event_title: str, event_subtitle: str, section_title: str, timestamp: datetime):
'''
send asynchronous request, append future with filepath to self.futures
'''
doc_url = doc['action']['payload']
if subtitleText is None:
subtitleText = ''

try:
date = doc['detail']
iso_date = '-'.join(date.split('.')[::-1])
except KeyError:
date = ''
iso_date = ''
doc_id = doc['id']

# extract time from subtitleText
try:
time = re.findall('um (\\d+:\\d+) Uhr', subtitleText)
if time == []:
time = ''
else:
time = f' {time[0]}'
except TypeError:
time = ''

if subfolder is not None:
directory = self.output_path / subfolder
else:
directory = self.output_path

# If doc_type is something like 'Kosteninformation 2', then strip the 2 and save it in doc_type_num
doc_type = doc['title'].rsplit(' ')
if doc_type[-1].isnumeric() is True:
doc_type_num = f' {doc_type.pop()}'
else:
doc_type_num = ''

doc_type = ' '.join(doc_type)
titleText = titleText.replace('\n', '').replace('/', '-')
subtitleText = subtitleText.replace('\n', '').replace('/', '-')

filename = self.filename_fmt.format(
iso_date=iso_date, time=time, title=titleText, subtitle=subtitleText, doc_num=doc_type_num, id=doc_id
)

filename_with_doc_id = filename + f' ({doc_id})'

if doc_type in ['Kontoauszug', 'Depotauszug']:
filepath = directory / 'Abschlüsse' / f'{filename}' / f'{doc_type}.pdf'
filepath_with_doc_id = directory / 'Abschlüsse' / f'{filename_with_doc_id}' / f'{doc_type}.pdf'
else:
filepath = directory / doc_type / f'{filename}.pdf'
filepath_with_doc_id = directory / doc_type / f'{filename_with_doc_id}.pdf'
document_title = doc.get('title', '')


variables = {}
variables['iso_date'] = timestamp.strftime('%Y-%m-%d')
variables['iso_date_year'] = timestamp.strftime('%Y')
variables['iso_date_month'] = timestamp.strftime('%m')
variables['iso_date_day'] = timestamp.strftime('%d')
variables['iso_time'] = timestamp.strftime('%H-%M')

filepath = self.file_destination_provider.get_file_path(event_type, event_title, event_subtitle, section_title, document_title, variables)
if filepath.endswith('.pdf') is False:
filepath = f'{filepath}.pdf'

filepath = Path(os.path.join( self.output_path , filepath))

if self.universal_filepath:
filepath = sanitize_filepath(filepath, '_', 'universal')
filepath_with_doc_id = sanitize_filepath(filepath_with_doc_id, '_', 'universal')
else:
filepath = sanitize_filepath(filepath, '_', 'auto')
filepath_with_doc_id = sanitize_filepath(filepath_with_doc_id, '_', 'auto')

Katzmann1983 marked this conversation as resolved.
Show resolved Hide resolved

if filepath in self.filepaths:
self.log.debug(f'File {filepath} already in queue. Append document id {doc_id}...')
if filepath_with_doc_id in self.filepaths:
self.log.debug(f'File {filepath_with_doc_id} already in queue. Skipping...')
return
else:
filepath = filepath_with_doc_id
self.log.debug(f'File {filepath} already in queue. Skipping...')
return

doc['local filepath'] = str(filepath)
self.filepaths.append(filepath)

Expand Down
185 changes: 185 additions & 0 deletions pytr/file_destination_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import os
import re
import shutil
import pytr.config

from importlib_resources import files
from yaml import safe_load
from pathlib import Path
from pytr.app_path import *
from pytr.utils import get_logger

# ToDo Question if we want to use LibYAML which is faster than pure Python version but another dependency
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper


ALL_CONFIG = "all"
UNKNOWN_CONFIG = "unknown"

TEMPLATE_FILE_NAME ="file_destination_config__template.yaml"


class DefaultFormateValue(dict):
def __missing__(self, key):
return key.join("{}")


class DestinationConfig:
def __init__(self, config_name: str, filename: str, path: str = None, pattern: list = None):
self.config_name = config_name
self.filename = filename
self.path = path
self.pattern = pattern


class Pattern:
def __init__(self, event_type: str, event_subtitle: str, event_title: str, section_title: str, document_title: str):
self.event_type = event_type
self.event_subtitle = event_subtitle
self.event_title = event_title
self.section_title = section_title
self.document_title = document_title


class FileDestinationProvider:

def __init__(self):
'''
A provider for file path and file names based on the event type and other parameters.
'''
self._log = get_logger(__name__)

config_file_path = Path(DESTINATION_CONFIG_FILE)
if config_file_path.is_file() == False:
self.__create_default_config(config_file_path)

config_file = open(config_file_path, "r", encoding="utf8")
destination_config = safe_load(config_file)

self.__validate_config(destination_config)

destinations = destination_config["destination"]

self._destination_configs: list[DestinationConfig] = []

for config_name in destinations:
if config_name == ALL_CONFIG:
self._all_file_config = DestinationConfig(
ALL_CONFIG, destinations[ALL_CONFIG]["filename"])
elif config_name == UNKNOWN_CONFIG:
self._unknown_file_config = DestinationConfig(
UNKNOWN_CONFIG, destinations[UNKNOWN_CONFIG]["filename"], destinations[UNKNOWN_CONFIG]["path"])
else:
patterns = self.__extract_pattern(
destinations[config_name].get("pattern", None))
self._destination_configs.append(DestinationConfig(
config_name, destinations[config_name].get("filename", None), destinations[config_name].get("path", None), patterns))

def get_file_path(self, event_type: str, event_title: str, event_subtitle: str, section_title: str, document_title: str, variables: dict) -> str:
'''
Get the file path based on the event type and other parameters.

Parameters:
event_type (str): The event type
event_title (str): The event title
event_subtitle (str): The event subtitle
section_title (str): The section title
document_title (str): The document title
variables (dict): The variables->value dict to be used in the file path and file name format.
'''

matching_configs = self._destination_configs.copy()

# Maybe this can be improved looks like a lot of code duplication ... on the other hand using a
# dict for the parameters for example and iterate over it would make it harder to understand
if event_type is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_type", event_type), matching_configs))
variables["event_type"] = event_type

if event_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_title", event_title), matching_configs))
variables["event_title"] = event_title

if event_subtitle is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "event_subtitle", event_subtitle), matching_configs))
variables["event_subtitle"] = event_subtitle

if section_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "section_title", section_title), matching_configs))
variables["section_title"] = section_title

if document_title is not None:
matching_configs = list(filter(lambda config: self.__is_matching_config(
config, "document_title", document_title), matching_configs))
variables["document_title"] = document_title

if len(matching_configs) == 0:
self._log.debug(
f"No destination config found for the given parameters: event_type:{event_type}, event_title:{event_title},event_subtitle:{event_subtitle},section_title:{section_title},document_title:{document_title}")
return self.__create_file_path(self._unknown_file_config, variables)

if len(matching_configs) > 1:
self._log.debug(f"Multiple Destination Patterns where found. Using 'unknown' config! Parameter: event_type:{event_type}, event_title:{event_title},event_subtitle:{event_subtitle},section_title:{section_title},document_title:{document_title}")
return self.__create_file_path(self._unknown_file_config, variables)

return self.__create_file_path(matching_configs[0], variables)

def __is_matching_config(self, config: DestinationConfig, key: str, value: str):
for pattern in config.pattern:
attribute = getattr(pattern, key)
if attribute is None or re.match(attribute, value):
return True

return False

def __create_file_path(self, config: DestinationConfig, variables: dict):
formate_variables = DefaultFormateValue(variables)

path = config.path
filename = config.filename
if filename is None:
filename = self._all_file_config.filename

return os.path.join(path, filename).format_map(formate_variables)

def __extract_pattern(self, pattern_config: list) -> list:
patterns = []
for pattern in pattern_config:
patterns.append(Pattern(pattern.get("event_type", None),
pattern.get("event_subtitle", None),
pattern.get("event_title", None),
pattern.get("section_title", None),
pattern.get("document_title", None)))

return patterns

def __validate_config(self, destination_config: dict):
if "destination" not in destination_config:
raise ValueError("'destination' key not found in config file")

destinations = destination_config["destination"]

# Check if default config is present
if ALL_CONFIG not in destinations or "filename" not in destinations[ALL_CONFIG]:
raise ValueError(
"'all' config not found or filename not not present in default config")

if UNKNOWN_CONFIG not in destinations or "filename" not in destinations[UNKNOWN_CONFIG] or "path" not in destinations[UNKNOWN_CONFIG]:
raise ValueError(
"'unknown' config not found or filename/path not not present in unknown config")

for config_name in destinations:
if config_name != ALL_CONFIG and "path" not in destinations[config_name]:
raise ValueError(
f"'{config_name}' has no path defined in destination config")

def __create_default_config(self, config_file_path: Path):
path = files(pytr.config).joinpath(TEMPLATE_FILE_NAME)
shutil.copyfile(path, config_file_path)
Loading