-
Notifications
You must be signed in to change notification settings - Fork 42
/
remove_metadata_from_pdf.py
31 lines (25 loc) · 1022 Bytes
/
remove_metadata_from_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pikepdf
def remove_pdf_metadata(pdf_file, output_file):
"""
Removes all metadata from a PDF file.
Args:
pdf_file (str): The path to the input PDF file.
output_file (str): The path to the output PDF file with metadata removed.
Returns:
None
"""
# Open the PDF file using the pikepdf library
pdf = pikepdf.Pdf.open(pdf_file)
# Open the PDF metadata and set the editor to False to avoid adding pikepdf as the editor
with pdf.open_metadata(set_pikepdf_as_editor=False) as meta:
# Get a list of all the metadata keys
keys = list(meta.keys())
# Loop through the metadata keys and delete each one
for key in keys:
del meta[key]
# Save the PDF file with the metadata removed
pdf.save(output_file)
# Close the PDF file
pdf.close()
# Example usage: Remove metadata from 'example.pdf' and save the result as 'cleaned_example.pdf'
remove_pdf_metadata('files/example.pdf', 'files/cleaned_example.pdf')