Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create directory for unrelated tools #47

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions README.md

This file was deleted.

22 changes: 22 additions & 0 deletions misc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# A collection of unrelated utils and tools

## txt2pdf.py [here](txt2pdf.py)

### Description
* Primary purpose: generate pdf docs for the period CLOUD reports.
* Converts ascii text into a pdf document.
* If the input file contains basic markdown tags, they are recognised and processed accordingly.
* Tables
* Titles
* bold face
* Each title starts on a new page.
* A title page is created, with the current date.
* Execute "txt2pdf.py -h" for help on the input options.

### Installation and Setup
* **dependency**:reportlab

### TO-DO and Whishlist
* Improve quality of the code, make it more modular
* Improve the -h/--help message to explain better the different combinations of input options
* Add CLOUD-specific code, to enhance the content of the generated document. For example: some **warning** or **critical** alert when some numbers are not good.
205 changes: 205 additions & 0 deletions misc/txt2pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/usr/bin/env python3

import argparse
import os
import re
from datetime import datetime

from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER # For center alignment of text
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
Spacer,
Table,
TableStyle,
PageBreak,
)

def parse_markdown(input_text):
"""
Parses the input markdown text and converts it into a list of ReportLab flowables.

:param input_text: The input text containing markdown-like syntax.
:type input_text: str
:return: A list of ReportLab flowables (text, tables, etc.) to be included in the PDF.
:rtype: list
"""
flowables = [] # List to store elements to be added to the PDF
styles = getSampleStyleSheet() # Get default styles provided by ReportLab

# Define custom styles for different heading levels
title_styles = {
f'Heading{level}': ParagraphStyle(
f'Heading{level}',
parent=styles['Heading1' if level == 1 else 'Normal'],
fontSize=24 - (level - 1) * 2, # Font size decreases with level
leading=28 - (level - 1) * 2, # Line height decreases with level
spaceAfter=12, # Space after the heading
)
for level in range(1, 7) # Define styles for headings 1 through 6
}

normal_style = styles['Normal'] # Default style for regular paragraphs

lines = input_text.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
if not line:
i += 1
continue

# Handle headings defined by # symbols
title_match = re.match(r'^(#{1,6})\s+(.*)', line) # Match lines like "# Heading"
if title_match:
level = len(title_match.group(1)) # Number of # symbols defines the level
text = title_match.group(2) # Extract the heading text
style = title_styles[f'Heading{level}'] # Select the appropriate style
if level == 1 and flowables: # Insert a page break for new sections
flowables.append(PageBreak())
paragraph = Paragraph(process_inline(text), style) # Create a styled paragraph
flowables.append(paragraph)
flowables.append(Spacer(1, 12)) # Add space after the heading
i += 1
continue

# Handle tables (lines containing '|')
if '|' in line:
table_lines = []
while i < len(lines) and '|' in lines[i]: # Continue until the table ends
table_lines.append(lines[i])
i += 1
table = parse_table(table_lines) # Parse the table lines
if table:
flowables.append(table) # Add the table to the flowables
flowables.append(Spacer(1, 12)) # Add space after the table
continue

# Handle regular paragraphs
paragraph = Paragraph(process_inline(line), normal_style)
flowables.append(paragraph)
flowables.append(Spacer(1, 12)) # Add space after the paragraph
i += 1

return flowables

def process_inline(text):
"""
Processes inline markdown syntax like bold (**text**) and italic (*text*).

:param text: The input text with inline markdown syntax.
:type text: str
:return: Text with ReportLab-compatible HTML tags for styling.
:rtype: str
"""
bold_pattern = r'(\*\*|__)(.*?)\1' # Pattern for bold (** or __)
italic_pattern = r'(\*|_)(.*?)\1' # Pattern for italic (* or _)

# Replace bold markdown with <b> tags
def bold_repl(match):
return f'<b>{match.group(2)}</b>'

# Replace italic markdown with <i> tags
def italic_repl(match):
return f'<i>{match.group(2)}</i>'

text = re.sub(bold_pattern, bold_repl, text) # Apply bold replacements
text = re.sub(italic_pattern, italic_repl, text) # Apply italic replacements

return text

def parse_table(table_lines):
"""
Parses markdown table lines into a ReportLab Table object.

:param table_lines: List of strings representing the table in markdown format.
:type table_lines: list
:return: A styled ReportLab Table object.
:rtype: Table
"""
if len(table_lines) < 2: # A valid table requires at least a header and one row
return None

# Extract headers from the first line (ignoring leading and trailing |)
headers = [cell.strip() for cell in re.split(r'\|', table_lines[0])[1:-1]]
data = [headers] # Initialize table data with headers

# Extract data rows (skipping the separator line)
for line in table_lines[2:]:
cells = [cell.strip() for cell in re.split(r'\|', line)[1:-1]]
if len(cells) == len(headers): # Ensure row length matches header length
data.append(cells)

if not data:
return None

# Create the table with centered alignment
tbl = Table(data, hAlign='CENTER')

# Apply table styles
tbl_style = TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey), # Grey background for header
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), # White text for header
('ALIGN', (0, 0), (-1, -1), 'CENTER'), # Center align all cells
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), # Bold font for header
('BOTTOMPADDING', (0, 0), (-1, 0), 12), # Padding for header
('GRID', (0, 0), (-1, -1), 1, colors.black), # Black grid lines
])
tbl.setStyle(tbl_style)

return tbl

def convert_to_pdf(input_file, output_file):
"""
Converts the input markdown-like file to a PDF.

:param input_file: Path to the input file containing markdown-like text.
:type input_file: str
:param output_file: Path to save the generated PDF.
:type output_file: str
"""
with open(input_file, 'r', encoding='utf-8') as f:
input_text = f.read()

flowables = [] # List to store elements for the PDF

# Add a title page with the current date
date_str = datetime.now().strftime('%Y-%m-%d')
title = Paragraph("Cloud Operations Report", ParagraphStyle('Title', fontSize=36, alignment=TA_CENTER))
date = Paragraph(date_str, ParagraphStyle('Date', fontSize=24, alignment=TA_CENTER))
flowables.extend([Spacer(1, 100), title, Spacer(1, 50), date, PageBreak()])

# Parse the markdown content and add to flowables
flowables.extend(parse_markdown(input_text))

# Create and build the PDF document
doc = SimpleDocTemplate(
output_file,
pagesize=LETTER,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=72,
)
doc.build(flowables)

def main():
parser = argparse.ArgumentParser(description="Convert an input file to a PDF file.")
parser.add_argument("--input", "-i", required=True, help="Path to the input file.")
parser.add_argument("--output", "-o", help="Path to the output file (optional).")
args = parser.parse_args()
input_filename = args.input
output_filename = args.output
if not output_filename:
# if the output_filename has not been provided
base_name, ext = os.path.splitext(input_filename)
# replace the extension from the input_filename when it exists
# otherwise, just add .pdf
output_filename = f"{base_name}.pdf" if ext else f"{input_filename}.pdf"
convert_to_pdf(input_filename, output_filename)

if __name__ == '__main__':
main()