Merge pull request #1 from DjangoPeng/v0.1

feat:add temp_manager, ppt_generator and input_parser
DjangoPeng · Oct 12, 2024 · b9d383c · b9d383c
2 parents 6d6bc0d + aec58c5
commit b9d383c
Show file tree

Hide file tree

Showing 10 changed files with 325 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,8 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+# ChatPPT custom config
+outputs/*
+.DS_STore
diff --git a/docs/ppt_input_format.md b/docs/ppt_input_format.md
@@ -0,0 +1,105 @@
+### **ChatPPT 输入文本格式说明**
+
+此文档介绍如何编写输入文本以生成 PowerPoint 幻灯片。每个幻灯片包含标题、要点、图片等内容。输入文本将被解析并自动生成对应的 PowerPoint 文件。
+
+
+ ChatPPT 输入文本格式如下所示：
+
+```plaintext
+# [主标题]
+
+## [幻灯片标题] [布局名称]
+- [要点内容1]
+- [要点内容2]
+
+## [幻灯片标题] [布局名称]
+- [要点内容1]
+![图片描述](图片路径)
+```
+
+#### **1. 主标题**
+   - 格式：`# [主标题]`
+   - 说明：主标题作为整个 PowerPoint 的标题，同时也将作为生成的 PowerPoint 文件名。
+   - 示例：
+     ```
+     # 企业年度报告
+     ```
+
+#### **2. 幻灯片标题和布局**
+   - 格式：`## [幻灯片标题] [布局名称]`
+   - 说明：每张幻灯片以 `##` 开头，后面跟随标题和布局名称。布局名称需要在模板文件中存在。
+   - 示例：
+     ```
+     ## 2024 业绩概述 [Title and Content]
+     ```
+
+#### **3. 幻灯片内容 - 要点列表**
+   - 格式：`- [要点内容]`
+   - 说明：每个要点以 `-` 开头，后跟要点的内容。该格式用于生成幻灯片中的项目符号列表。
+   - 示例：
+     ```
+     - 总收入增长15%
+     - 市场份额扩大至30%
+     ```
+
+#### **4. 幻灯片内容 - 图片**
+   - 格式：`![图片描述](图片路径)`
+   - 说明：使用 `![图片描述](图片路径)` 的格式插入图片。图片路径应该为相对路径或绝对路径，确保文件存在于指定路径下。
+   - 示例：
+     ```
+     ![业绩图表](images/performance_chart.png)
+     ```
+
+### **完整输入文本示例**
+
+以下是一个完整的输入文本示例，包含主标题、多个幻灯片、要点列表以及图片插入：
+
+```plaintext
+# 企业年度报告
+
+## 2024 业绩概述 [Title and Content]
+- 总收入增长15%
+- 市场份额扩大至30%
+
+## 新产品发布 [Title and Content]
+- 产品A: 特色功能介绍
+- 产品B: 市场定位
+
+## 业绩图表 [Title and 2 Column]
+![业绩图表](images/performance_chart.png)
+```
+
+### **各部分说明**
+
+1. **主标题**：
+   - `# 企业年度报告`：该文本将用作生成的 PowerPoint 文件名，即 "企业年度报告.pptx"。
+
+2. **幻灯片 1：2024 业绩概述**：
+   - 标题：`2024 业绩概述`，布局为 `Title and Content`。
+   - 内容：包括两条要点，分别是 "总收入增长15%" 和 "市场份额扩大至30%"。
+
+3. **幻灯片 2：新产品发布**：
+   - 标题：`新产品发布`，布局为 `Title and Content`。
+   - 内容：包括两条要点，分别是 "产品A: 特色功能介绍" 和 "产品B: 市场定位"。
+
+4. **幻灯片 3：业绩图表**：
+   - 标题：`业绩图表`，布局为 `Title and 2 Column`。
+   - 图片：插入路径为 `images/performance_chart.png` 的图片。
+
+### **可用布局**
+
+布局名称必须与 PowerPoint 模板中的布局名称相匹配。请确保在模板中存在的布局名称可以正确映射到输入文本中。例如：
+- `Title and Content`
+- `Title and 2 Column`
+- `Title Only`
+
+可以通过运行程序中的 `print_layouts` 函数来打印模板中可用的布局名称，确保输入文本中的布局名称与模板中的名称一致。
+
+
+### **注意事项**
+
+1. **布局名称**：请确保输入文本中的布局名称与模板中的布局名称保持一致。例如，`[Title and Content]` 必须与模板中的布局名完全匹配。
+
+2. **图片路径**：图片路径应为本地文件系统的相对路径或绝对路径。确保图片文件存在于指定位置。
+
+3. **占位符**：确保模板中的布局包含文本和图片占位符，以便自动插入要点和图片。
diff --git a/images/forecast.png b/images/forecast.png
diff --git a/images/performance_chart.png b/images/performance_chart.png
diff --git a/src/input_parser.py b/src/input_parser.py
@@ -0,0 +1,111 @@
+import re
+from typing import Optional, List
+from dataclasses import dataclass, field
+
+@dataclass
+class SlideContent:
+    title: str
+    bullet_points: List[str] = field(default_factory=list)
+    image_path: Optional[str] = None
+
+@dataclass
+class Slide:
+    layout: int
+    content: SlideContent
+
+@dataclass
+class PowerPoint:
+    title: str
+    slides: List[Slide] = field(default_factory=list)
+
+
+def convert_user_input_to_standard_format(user_input: str) -> str:
+    """
+    将用户输入的原始格式转换为标准的输入文本格式，供 parse_input_text 使用。
+    
+    参数:
+    user_input (str): 用户提供的原始格式文本
+    
+    返回:
+    str: 标准格式文本
+    """
+    # 用于存储转换后的结果
+    converted_text = []
+
+    # 正则表达式模式
+    slide_pattern = re.compile(r'^- \*\*Slide \d+\*\*:.*$', re.MULTILINE)
+    title_pattern = re.compile(r'\*\*Title\*\*: (.*)')
+    points_pattern = re.compile(r'\*\*Key Points\*\*:.*$', re.MULTILINE)
+    notes_pattern = re.compile(r'\*\*Notes\*\*:.*$', re.MULTILINE)
+
+    # 分割出每张幻灯片
+    slides = slide_pattern.split(user_input)
+
+    # 遍历每张幻灯片
+    for slide in slides:
+        # 找到标题
+        title_match = title_pattern.search(slide)
+        if title_match:
+            title = title_match.group(1).strip()
+            converted_text.append(f"## {title} [Title and Content 3]")
+
+        # 找到要点
+        points_section = points_pattern.split(slide)
+        if len(points_section) > 1:
+            points = points_section[1].strip().split('\n')
+            for point in points:
+                # 检查是否是要点
+                if point.strip().startswith('1.') or point.strip().startswith('2.') or point.strip().startswith('3.'):
+                    point_cleaned = point.strip()[3:].strip()
+                    converted_text.append(f"- {point_cleaned}")
+
+        # 处理附加的图片、图表建议
+        notes_section = notes_pattern.split(slide)
+        if len(notes_section) > 1:
+            notes = notes_section[1].strip().split('\n')
+            for note in notes:
+                # 检查是否是图片或图表的推荐
+                if note.lower().startswith('image:') or note.lower().startswith('chart:'):
+                    note_cleaned = note.split(':', 1)[1].strip()
+                    converted_text.append(f"![{note_cleaned}]({note_cleaned})")
+
+    return '\n'.join(converted_text)
+
+
+def parse_input_text(input_text: str, layout_mapping: dict) -> PowerPoint:
+    lines = input_text.split('\n')
+    presentation_title = ""
+    slides = []
+    current_slide: Optional[Slide] = None
+
+    slide_title_pattern = re.compile(r'^##\s+(.*?)\s+\[(.*?)\]')
+    bullet_pattern = re.compile(r'^-\s+(.*)')
+    image_pattern = re.compile(r'!\[.*?\]\((.*?)\)')
+
+    for line in lines:
+        line = line.strip()
+        if line.startswith('# ') and not line.startswith('##'):
+            presentation_title = line[2:].strip()
+        elif line.startswith('## '):
+            match = slide_title_pattern.match(line)
+            if match:
+                title, layout_name = match.groups()
+                layout_index = layout_mapping.get(layout_name.strip(), 1)
+                if current_slide:
+                    slides.append(current_slide)
+                current_slide = Slide(layout=layout_index, content=SlideContent(title=title.strip()))
+        elif line.startswith('- ') and current_slide:
+            match = bullet_pattern.match(line)
+            if match:
+                bullet = match.group(1).strip()
+                current_slide.content.bullet_points.append(bullet)
+        elif line.startswith('![') and current_slide:
+            match = image_pattern.match(line)
+            if match:
+                image_path = match.group(1).strip()
+                current_slide.content.image_path = image_path
+
+    if current_slide:
+        slides.append(current_slide)
+
+    return PowerPoint(title=presentation_title, slides=slides), presentation_title
diff --git a/src/main.py b/src/main.py
@@ -0,0 +1,39 @@
+import os
+from input_parser import parse_input_text
+from ppt_generator import generate_presentation
+from template_manager import load_template, get_layout_mapping, print_layouts
+
+def main():
+    input_text = """
+    # ChatPPT_Demo
+
+    ## ChatPPT Demo [Title Only]
+
+    ## 2024 业绩概述 [Title and Content]
+    - 总收入增长15%
+    - 市场份额扩大至30%
+
+    ## 业绩图表 [Title and Picture 1]
+    ![业绩图表](images/performance_chart.png)
+
+    ## 新产品发布 [Title and 2 Column]
+    - 产品A: 特色功能介绍
+    - 产品B: 市场定位
+    ![未来增长](images/forecast.png)
+    """
+
+    template_file = 'templates/MasterTemplate.pptx'
+    prs = load_template(template_file)
+
+    print("Available Slide Layouts:")
+    print_layouts(prs)
+
+    layout_mapping = get_layout_mapping(prs)
+
+    powerpoint_data, presentation_title = parse_input_text(input_text, layout_mapping)
+
+    output_pptx = f"outputs/{presentation_title}.pptx"
+    generate_presentation(powerpoint_data, template_file, output_pptx)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/ppt_generator.py b/src/ppt_generator.py
@@ -0,0 +1,42 @@
+import os
+from pptx import Presentation
+from utils import remove_all_slides
+
+def generate_presentation(powerpoint_data, template_path: str, output_path: str):
+    if not os.path.exists(template_path):
+        raise FileNotFoundError(f"Template file '{template_path}' does not exist.")
+
+    prs = Presentation(template_path)
+    remove_all_slides(prs)
+    prs.core_properties.title = powerpoint_data.title
+
+    for slide in powerpoint_data.slides:
+        if slide.layout >= len(prs.slide_layouts):
+            slide_layout = prs.slide_layouts[0]
+        else:
+            slide_layout = prs.slide_layouts[slide.layout]
+
+        new_slide = prs.slides.add_slide(slide_layout)
+
+        if new_slide.shapes.title:
+            new_slide.shapes.title.text = slide.content.title
+
+        for shape in new_slide.shapes:
+            if shape.has_text_frame and not shape == new_slide.shapes.title:
+                text_frame = shape.text_frame
+                text_frame.clear()
+                for point in slide.content.bullet_points:
+                    p = text_frame.add_paragraph()
+                    p.text = point
+                    p.level = 0
+                break
+
+        if slide.content.image_path:
+            image_full_path = os.path.join(os.getcwd(), slide.content.image_path)
+            if os.path.exists(image_full_path):
+                for shape in new_slide.placeholders:
+                    if shape.placeholder_format.type == 18:
+                        shape.insert_picture(image_full_path)
+                        break
+    prs.save(output_path)
+    print(f"Presentation saved to '{output_path}'")
diff --git a/src/template_manager.py b/src/template_manager.py
@@ -0,0 +1,15 @@
+from pptx import Presentation
+
+def load_template(template_path: str) -> Presentation:
+    prs = Presentation(template_path)
+    return prs
+
+def get_layout_mapping(prs: Presentation) -> dict:
+    layout_mapping = {}
+    for idx, layout in enumerate(prs.slide_layouts):
+        layout_mapping[layout.name] = idx
+    return layout_mapping
+
+def print_layouts(prs: Presentation):
+    for idx, layout in enumerate(prs.slide_layouts):
+        print(f"Layout {idx}: {layout.name}")
diff --git a/src/utils.py b/src/utils.py
@@ -0,0 +1,8 @@
+from pptx import Presentation
+
+def remove_all_slides(prs: Presentation):
+    xml_slides = prs.slides._sldIdLst
+    slides = list(xml_slides)
+    for slide in slides:
+        xml_slides.remove(slide)
+    print("所有默认幻灯片已被移除。")
diff --git a/templates/MasterTemplate.pptx b/templates/MasterTemplate.pptx