This repository has been archived by the owner on Nov 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-hkt-day-lists.py
221 lines (202 loc) · 7.71 KB
/
build-hkt-day-lists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import io
import pathlib
import typing as t
import google.oauth2.credentials
import google.oauth2.service_account
import googleapiclient.errors # type: ignore
import googleapiclient.http # type: ignore
import pandas as pd
import pdfkit # type: ignore
from docx import Document
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from docx.shared import Inches
from googleapiclient.discovery import build # type: ignore
HKT_FILE_NAME = "HomeKitaTage.xlsx"
HKT_FILE_PATH = pathlib.Path(f"/tmp/{HKT_FILE_NAME}")
SCOPES = ["https://www.googleapis.com/auth/drive"]
SERVICE_ACCOUNT_FILE = pathlib.Path("/tmp/elternvertretung-b7713037bac6.json")
if not SERVICE_ACCOUNT_FILE.is_file():
SERVICE_ACCOUNT_FILE = pathlib.Path(
"/data/elternvertretung-b7713037bac6.json"
)
CREDENTIALS = (
google.oauth2.service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES
)
)
def dataframe_to_word(df, docx_file_path):
document = Document()
# Set custom margins (e.g., 0.5 inches for top and bottom)
sections = document.sections
for section in sections:
section.top_margin = Inches(0.5)
section.bottom_margin = Inches(0.5)
# Add a table with borders
table = document.add_table(rows=1, cols=len(df.columns))
table.style = "Table Grid" # Use a built-in style with borders
# Add header row
hdr_cells = table.rows[0].cells
for i, column in enumerate(df.columns):
hdr_cells[i].text = str(column)
# Add data rows
for index, row in df.iterrows():
row_cells = table.add_row().cells
for i, value in enumerate(row):
row_cells[i].text = str(value)
# Apply borders to each cell (if needed)
for row in table.rows:
for cell in row.cells:
cell._element.get_or_add_tcPr().append(
parse_xml(r'<w:shd {} w:fill="FFFFFF"/>'.format(nsdecls("w")))
)
cell._element.get_or_add_tcPr().append(
parse_xml(
(
r'<w:tcBorders %s><w:top w:val="single" w:sz="4"/>'
r'<w:left w:val="single" w:sz="4"/>'
r'<w:bottom w:val="single" w:sz="4"/>'
r'<w:right w:val="single" w:sz="4"/>'
r"</w:tcBorders>"
)
% nsdecls("w")
)
)
document.save(docx_file_path)
def export_excel_file_from_google_drive(filename) -> None:
HKT_FILE_PATH.unlink(missing_ok=True)
file: t.Optional[io.BytesIO] = None
try:
service = build("drive", "v3", credentials=CREDENTIALS)
results = (
service.files()
.list(fields="nextPageToken, files(id, name)")
.execute()
)
items = results.get("files", [])
if not items:
print("No files found.")
for item in items:
if item["name"] != filename:
continue
file_id = item["id"]
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = googleapiclient.http.MediaIoBaseDownload(
file, request
)
done = False
while done is False:
status, done = downloader.next_chunk()
HKT_FILE_PATH.write_bytes(file.getvalue())
if not HKT_FILE_PATH.is_file():
print(f"File {filename} not found.")
except googleapiclient.errors.HttpError as error:
print(f"An error occurred: {error}")
def upload_overview_files_to_google_drive(
daily_overview_file_paths: list[pathlib.Path],
) -> None:
try:
service = build("drive", "v3", credentials=CREDENTIALS)
existing_files = (
service.files()
.list(fields="nextPageToken, files(id, name)")
.execute()
).get("files", [])
for file_path in daily_overview_file_paths:
if existing_files:
for existing_file in existing_files:
if existing_file["name"] == file_path.stem:
service.files().delete(
fileId=existing_file["id"]
).execute()
existing_files.remove(existing_file)
break
file_metadata = {
"name": file_path.stem,
"parents": ["19PV7rVVVA1uPS-LIDU5bdHGYvz3lODXB"],
}
media = googleapiclient.http.MediaFileUpload(
file_path, chunksize=-1
)
file = (
service.files()
.create(
body=file_metadata,
media_body=media,
fields="id,name,webViewLink",
)
.execute()
)
print(f"Uploaded {file.get('name')} to {file.get('webViewLink')}.")
# print(f'Link: {file.get("webViewLink")}')
# permission = {
# "type": "user",
# "role": "writer",
# "emailAddress": "[email protected]"
# }
# # https://developers.google.com/drive/api/reference/rest/v3/permissions/create
# service.permissions().create(
# fileId=file.get("id"),
# body=permission,
# transferOwnership=False,
# sendNotificationEmail=False,
# supportsAllDrives=True,
# # moveToNewOwnersRoot=True,
# ).execute()
except googleapiclient.errors.HttpError as error:
print(f"An error occurred: {error}")
def generate_daily_overview_files() -> list[pathlib.Path]:
daily_overview_file_paths: list[pathlib.Path] = []
df = pd.read_excel(HKT_FILE_PATH, sheet_name="HKT Erfassung")
for group_name, group_df in df.groupby("Group"):
for no, day in enumerate(
(
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
),
start=1,
):
day_df = group_df[
(group_df[f"{day}\nmorning"] == 1.0)
| (group_df[f"{day}\nafternoon"] == 1.0)
]
day_df = day_df.replace(1.0, "Stay at home")
html_file_path = pathlib.Path(f"/tmp/{group_name}_{no}_{day}.html")
df = day_df[
[
"Name",
"Group",
f"{day}\nmorning",
f"{day}\nafternoon",
]
].fillna("")
df.to_html(html_file_path, index=False)
pdf_file_path = html_file_path.with_suffix(".pdf")
options = {"encoding": "UTF-8", "user-style-sheet": "style.css"}
pdfkit.from_file(
input=str(html_file_path),
output_path=str(pdf_file_path),
options=options,
verbose=False,
)
docx_file_path = html_file_path.with_suffix(".docx")
dataframe_to_word(df, docx_file_path)
# html_file_path.unlink(missing_ok=True)
for file_path in (
pdf_file_path,
docx_file_path,
):
if file_path.is_file():
daily_overview_file_paths.append(file_path)
return daily_overview_file_paths
if __name__ == "__main__":
export_excel_file_from_google_drive(filename=HKT_FILE_NAME)
if not HKT_FILE_PATH.is_file():
print(f"File {HKT_FILE_NAME} not found.")
raise SystemExit(1)
daily_overview_file_paths = generate_daily_overview_files()
upload_overview_files_to_google_drive(daily_overview_file_paths)