-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #47 from stfc/create_misc_directory
Create directory for unrelated tools
- Loading branch information
Showing
3 changed files
with
227 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# A collection of unrelated utils and tools | ||
|
||
## txt2pdf.py [here](txt2pdf.py) | ||
|
||
### Description | ||
* Primary purpose: generate pdf docs for the period CLOUD reports. | ||
* Converts ascii text into a pdf document. | ||
* If the input file contains basic markdown tags, they are recognised and processed accordingly. | ||
* Tables | ||
* Titles | ||
* bold face | ||
* Each title starts on a new page. | ||
* A title page is created, with the current date. | ||
* Execute "txt2pdf.py -h" for help on the input options. | ||
|
||
### Installation and Setup | ||
* **dependency**:reportlab | ||
|
||
### TO-DO and Whishlist | ||
* Improve quality of the code, make it more modular | ||
* Improve the -h/--help message to explain better the different combinations of input options | ||
* Add CLOUD-specific code, to enhance the content of the generated document. For example: some **warning** or **critical** alert when some numbers are not good. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import os | ||
import re | ||
from datetime import datetime | ||
|
||
from reportlab.lib import colors | ||
from reportlab.lib.enums import TA_CENTER # For center alignment of text | ||
from reportlab.lib.pagesizes import LETTER | ||
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet | ||
from reportlab.platypus import ( | ||
SimpleDocTemplate, | ||
Paragraph, | ||
Spacer, | ||
Table, | ||
TableStyle, | ||
PageBreak, | ||
) | ||
|
||
def parse_markdown(input_text): | ||
""" | ||
Parses the input markdown text and converts it into a list of ReportLab flowables. | ||
:param input_text: The input text containing markdown-like syntax. | ||
:type input_text: str | ||
:return: A list of ReportLab flowables (text, tables, etc.) to be included in the PDF. | ||
:rtype: list | ||
""" | ||
flowables = [] # List to store elements to be added to the PDF | ||
styles = getSampleStyleSheet() # Get default styles provided by ReportLab | ||
|
||
# Define custom styles for different heading levels | ||
title_styles = { | ||
f'Heading{level}': ParagraphStyle( | ||
f'Heading{level}', | ||
parent=styles['Heading1' if level == 1 else 'Normal'], | ||
fontSize=24 - (level - 1) * 2, # Font size decreases with level | ||
leading=28 - (level - 1) * 2, # Line height decreases with level | ||
spaceAfter=12, # Space after the heading | ||
) | ||
for level in range(1, 7) # Define styles for headings 1 through 6 | ||
} | ||
|
||
normal_style = styles['Normal'] # Default style for regular paragraphs | ||
|
||
lines = input_text.split('\n') | ||
i = 0 | ||
while i < len(lines): | ||
line = lines[i].strip() | ||
if not line: | ||
i += 1 | ||
continue | ||
|
||
# Handle headings defined by # symbols | ||
title_match = re.match(r'^(#{1,6})\s+(.*)', line) # Match lines like "# Heading" | ||
if title_match: | ||
level = len(title_match.group(1)) # Number of # symbols defines the level | ||
text = title_match.group(2) # Extract the heading text | ||
style = title_styles[f'Heading{level}'] # Select the appropriate style | ||
if level == 1 and flowables: # Insert a page break for new sections | ||
flowables.append(PageBreak()) | ||
paragraph = Paragraph(process_inline(text), style) # Create a styled paragraph | ||
flowables.append(paragraph) | ||
flowables.append(Spacer(1, 12)) # Add space after the heading | ||
i += 1 | ||
continue | ||
|
||
# Handle tables (lines containing '|') | ||
if '|' in line: | ||
table_lines = [] | ||
while i < len(lines) and '|' in lines[i]: # Continue until the table ends | ||
table_lines.append(lines[i]) | ||
i += 1 | ||
table = parse_table(table_lines) # Parse the table lines | ||
if table: | ||
flowables.append(table) # Add the table to the flowables | ||
flowables.append(Spacer(1, 12)) # Add space after the table | ||
continue | ||
|
||
# Handle regular paragraphs | ||
paragraph = Paragraph(process_inline(line), normal_style) | ||
flowables.append(paragraph) | ||
flowables.append(Spacer(1, 12)) # Add space after the paragraph | ||
i += 1 | ||
|
||
return flowables | ||
|
||
def process_inline(text): | ||
""" | ||
Processes inline markdown syntax like bold (**text**) and italic (*text*). | ||
:param text: The input text with inline markdown syntax. | ||
:type text: str | ||
:return: Text with ReportLab-compatible HTML tags for styling. | ||
:rtype: str | ||
""" | ||
bold_pattern = r'(\*\*|__)(.*?)\1' # Pattern for bold (** or __) | ||
italic_pattern = r'(\*|_)(.*?)\1' # Pattern for italic (* or _) | ||
|
||
# Replace bold markdown with <b> tags | ||
def bold_repl(match): | ||
return f'<b>{match.group(2)}</b>' | ||
|
||
# Replace italic markdown with <i> tags | ||
def italic_repl(match): | ||
return f'<i>{match.group(2)}</i>' | ||
|
||
text = re.sub(bold_pattern, bold_repl, text) # Apply bold replacements | ||
text = re.sub(italic_pattern, italic_repl, text) # Apply italic replacements | ||
|
||
return text | ||
|
||
def parse_table(table_lines): | ||
""" | ||
Parses markdown table lines into a ReportLab Table object. | ||
:param table_lines: List of strings representing the table in markdown format. | ||
:type table_lines: list | ||
:return: A styled ReportLab Table object. | ||
:rtype: Table | ||
""" | ||
if len(table_lines) < 2: # A valid table requires at least a header and one row | ||
return None | ||
|
||
# Extract headers from the first line (ignoring leading and trailing |) | ||
headers = [cell.strip() for cell in re.split(r'\|', table_lines[0])[1:-1]] | ||
data = [headers] # Initialize table data with headers | ||
|
||
# Extract data rows (skipping the separator line) | ||
for line in table_lines[2:]: | ||
cells = [cell.strip() for cell in re.split(r'\|', line)[1:-1]] | ||
if len(cells) == len(headers): # Ensure row length matches header length | ||
data.append(cells) | ||
|
||
if not data: | ||
return None | ||
|
||
# Create the table with centered alignment | ||
tbl = Table(data, hAlign='CENTER') | ||
|
||
# Apply table styles | ||
tbl_style = TableStyle([ | ||
('BACKGROUND', (0, 0), (-1, 0), colors.grey), # Grey background for header | ||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), # White text for header | ||
('ALIGN', (0, 0), (-1, -1), 'CENTER'), # Center align all cells | ||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), # Bold font for header | ||
('BOTTOMPADDING', (0, 0), (-1, 0), 12), # Padding for header | ||
('GRID', (0, 0), (-1, -1), 1, colors.black), # Black grid lines | ||
]) | ||
tbl.setStyle(tbl_style) | ||
|
||
return tbl | ||
|
||
def convert_to_pdf(input_file, output_file): | ||
""" | ||
Converts the input markdown-like file to a PDF. | ||
:param input_file: Path to the input file containing markdown-like text. | ||
:type input_file: str | ||
:param output_file: Path to save the generated PDF. | ||
:type output_file: str | ||
""" | ||
with open(input_file, 'r', encoding='utf-8') as f: | ||
input_text = f.read() | ||
|
||
flowables = [] # List to store elements for the PDF | ||
|
||
# Add a title page with the current date | ||
date_str = datetime.now().strftime('%Y-%m-%d') | ||
title = Paragraph("Cloud Operations Report", ParagraphStyle('Title', fontSize=36, alignment=TA_CENTER)) | ||
date = Paragraph(date_str, ParagraphStyle('Date', fontSize=24, alignment=TA_CENTER)) | ||
flowables.extend([Spacer(1, 100), title, Spacer(1, 50), date, PageBreak()]) | ||
|
||
# Parse the markdown content and add to flowables | ||
flowables.extend(parse_markdown(input_text)) | ||
|
||
# Create and build the PDF document | ||
doc = SimpleDocTemplate( | ||
output_file, | ||
pagesize=LETTER, | ||
rightMargin=72, | ||
leftMargin=72, | ||
topMargin=72, | ||
bottomMargin=72, | ||
) | ||
doc.build(flowables) | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Convert an input file to a PDF file.") | ||
parser.add_argument("--input", "-i", required=True, help="Path to the input file.") | ||
parser.add_argument("--output", "-o", help="Path to the output file (optional).") | ||
args = parser.parse_args() | ||
input_filename = args.input | ||
output_filename = args.output | ||
if not output_filename: | ||
# if the output_filename has not been provided | ||
base_name, ext = os.path.splitext(input_filename) | ||
# replace the extension from the input_filename when it exists | ||
# otherwise, just add .pdf | ||
output_filename = f"{base_name}.pdf" if ext else f"{input_filename}.pdf" | ||
convert_to_pdf(input_filename, output_filename) | ||
|
||
if __name__ == '__main__': | ||
main() |