Merge pull request #47 from stfc/create_misc_directory

Create directory for unrelated tools
stfc · Jan 16, 2025 · 67fbebd · 67fbebd
2 parents 532b137 + 8ce94a8
commit 67fbebd
Show file tree

Hide file tree

Showing 3 changed files with 227 additions and 12 deletions.
diff --git a/README.md b/README.md
diff --git a/misc/README.md b/misc/README.md
@@ -0,0 +1,22 @@
+# A collection of unrelated utils and tools 
+
+## txt2pdf.py [here](txt2pdf.py)
+
+### Description
+* Primary purpose: generate pdf docs for the period CLOUD reports. 
+* Converts ascii text into a pdf document. 
+* If the input file contains basic markdown tags, they are recognised and processed accordingly. 
+    * Tables
+    * Titles
+    * bold face 
+* Each title starts on a new page.
+* A title page is created, with the current date. 
+* Execute "txt2pdf.py -h" for help on the input options. 
+
+### Installation and Setup
+* **dependency**:reportlab
+
+### TO-DO and Whishlist
+* Improve quality of the code, make it more modular
+* Improve the -h/--help message to explain better the different combinations of input options
+* Add CLOUD-specific code, to enhance the content of the generated document. For example: some **warning** or **critical** alert when some numbers are not good.
diff --git a/misc/txt2pdf.py b/misc/txt2pdf.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import re
+from datetime import datetime
+
+from reportlab.lib import colors
+from reportlab.lib.enums import TA_CENTER  # For center alignment of text
+from reportlab.lib.pagesizes import LETTER
+from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.platypus import (
+    SimpleDocTemplate,
+    Paragraph,
+    Spacer,
+    Table,
+    TableStyle,
+    PageBreak,
+)
+
+def parse_markdown(input_text):
+    """
+    Parses the input markdown text and converts it into a list of ReportLab flowables.
+
+    :param input_text: The input text containing markdown-like syntax.
+    :type input_text: str
+    :return: A list of ReportLab flowables (text, tables, etc.) to be included in the PDF.
+    :rtype: list
+    """
+    flowables = []  # List to store elements to be added to the PDF
+    styles = getSampleStyleSheet()  # Get default styles provided by ReportLab
+
+    # Define custom styles for different heading levels
+    title_styles = {
+        f'Heading{level}': ParagraphStyle(
+            f'Heading{level}',
+            parent=styles['Heading1' if level == 1 else 'Normal'],
+            fontSize=24 - (level - 1) * 2,  # Font size decreases with level
+            leading=28 - (level - 1) * 2,  # Line height decreases with level
+            spaceAfter=12,  # Space after the heading
+        )
+        for level in range(1, 7)  # Define styles for headings 1 through 6
+    }
+
+    normal_style = styles['Normal']  # Default style for regular paragraphs
+
+    lines = input_text.split('\n')
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+        if not line:
+            i += 1
+            continue
+
+        # Handle headings defined by # symbols
+        title_match = re.match(r'^(#{1,6})\s+(.*)', line)  # Match lines like "# Heading"
+        if title_match:
+            level = len(title_match.group(1))  # Number of # symbols defines the level
+            text = title_match.group(2)  # Extract the heading text
+            style = title_styles[f'Heading{level}']  # Select the appropriate style
+            if level == 1 and flowables:  # Insert a page break for new sections
+                flowables.append(PageBreak())
+            paragraph = Paragraph(process_inline(text), style)  # Create a styled paragraph
+            flowables.append(paragraph)
+            flowables.append(Spacer(1, 12))  # Add space after the heading
+            i += 1
+            continue
+
+        # Handle tables (lines containing '|')
+        if '|' in line:
+            table_lines = []
+            while i < len(lines) and '|' in lines[i]:  # Continue until the table ends
+                table_lines.append(lines[i])
+                i += 1
+            table = parse_table(table_lines)  # Parse the table lines
+            if table:
+                flowables.append(table)  # Add the table to the flowables
+                flowables.append(Spacer(1, 12))  # Add space after the table
+            continue
+
+        # Handle regular paragraphs
+        paragraph = Paragraph(process_inline(line), normal_style)
+        flowables.append(paragraph)
+        flowables.append(Spacer(1, 12))  # Add space after the paragraph
+        i += 1
+
+    return flowables
+
+def process_inline(text):
+    """
+    Processes inline markdown syntax like bold (**text**) and italic (*text*).
+
+    :param text: The input text with inline markdown syntax.
+    :type text: str
+    :return: Text with ReportLab-compatible HTML tags for styling.
+    :rtype: str
+    """
+    bold_pattern = r'(\*\*|__)(.*?)\1'  # Pattern for bold (** or __)
+    italic_pattern = r'(\*|_)(.*?)\1'  # Pattern for italic (* or _)
+
+    # Replace bold markdown with <b> tags
+    def bold_repl(match):
+        return f'<b>{match.group(2)}</b>'
+
+    # Replace italic markdown with <i> tags
+    def italic_repl(match):
+        return f'<i>{match.group(2)}</i>'
+
+    text = re.sub(bold_pattern, bold_repl, text)  # Apply bold replacements
+    text = re.sub(italic_pattern, italic_repl, text)  # Apply italic replacements
+
+    return text
+
+def parse_table(table_lines):
+    """
+    Parses markdown table lines into a ReportLab Table object.
+
+    :param table_lines: List of strings representing the table in markdown format.
+    :type table_lines: list
+    :return: A styled ReportLab Table object.
+    :rtype: Table
+    """
+    if len(table_lines) < 2:  # A valid table requires at least a header and one row
+        return None
+
+    # Extract headers from the first line (ignoring leading and trailing |)
+    headers = [cell.strip() for cell in re.split(r'\|', table_lines[0])[1:-1]]
+    data = [headers]  # Initialize table data with headers
+
+    # Extract data rows (skipping the separator line)
+    for line in table_lines[2:]:
+        cells = [cell.strip() for cell in re.split(r'\|', line)[1:-1]]
+        if len(cells) == len(headers):  # Ensure row length matches header length
+            data.append(cells)
+
+    if not data:
+        return None
+
+    # Create the table with centered alignment
+    tbl = Table(data, hAlign='CENTER')
+
+    # Apply table styles
+    tbl_style = TableStyle([
+        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),  # Grey background for header
+        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),  # White text for header
+        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),  # Center align all cells
+        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Bold font for header
+        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),  # Padding for header
+        ('GRID', (0, 0), (-1, -1), 1, colors.black),  # Black grid lines
+    ])
+    tbl.setStyle(tbl_style)
+
+    return tbl
+
+def convert_to_pdf(input_file, output_file):
+    """
+    Converts the input markdown-like file to a PDF.
+
+    :param input_file: Path to the input file containing markdown-like text.
+    :type input_file: str
+    :param output_file: Path to save the generated PDF.
+    :type output_file: str
+    """
+    with open(input_file, 'r', encoding='utf-8') as f:
+        input_text = f.read()
+
+    flowables = []  # List to store elements for the PDF
+
+    # Add a title page with the current date
+    date_str = datetime.now().strftime('%Y-%m-%d')
+    title = Paragraph("Cloud Operations Report", ParagraphStyle('Title', fontSize=36, alignment=TA_CENTER))
+    date = Paragraph(date_str, ParagraphStyle('Date', fontSize=24, alignment=TA_CENTER))
+    flowables.extend([Spacer(1, 100), title, Spacer(1, 50), date, PageBreak()])
+
+    # Parse the markdown content and add to flowables
+    flowables.extend(parse_markdown(input_text))
+
+    # Create and build the PDF document
+    doc = SimpleDocTemplate(
+        output_file,
+        pagesize=LETTER,
+        rightMargin=72,
+        leftMargin=72,
+        topMargin=72,
+        bottomMargin=72,
+    )
+    doc.build(flowables)
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert an input file to a PDF file.")
+    parser.add_argument("--input", "-i", required=True, help="Path to the input file.")
+    parser.add_argument("--output", "-o", help="Path to the output file (optional).")
+    args = parser.parse_args()
+    input_filename = args.input
+    output_filename = args.output
+    if not output_filename:
+        # if the output_filename has not been provided
+        base_name, ext = os.path.splitext(input_filename)
+        # replace the extension from the input_filename when it exists
+        # otherwise, just add .pdf 
+        output_filename = f"{base_name}.pdf" if ext else f"{input_filename}.pdf"
+    convert_to_pdf(input_filename, output_filename)
+
+if __name__ == '__main__':
+    main()