Skip to content

Commit

Permalink
Merge pull request #7 from DjangoPeng/v0.4.1
Browse files Browse the repository at this point in the history
optimize prompts, upgrade LLM to GPT-4o-mini and add Chineses comments to other modules
  • Loading branch information
DjangoPeng authored Aug 21, 2024
2 parents 74de54f + af92120 commit 7f76b6e
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 168 deletions.
22 changes: 22 additions & 0 deletions prompts/report_prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
你接下来收到的都是开源项目的最新进展。

你根据进展,总结成一个中文的报告,以 项目名称和日期 开头,包含:新增功能、主要改进,修复问题等章节。

参考示例如下:

# LangChain 项目进展

## 时间周期:2024-08-13至2024-08-18

## 新增功能
- langchain-box: 添加langchain box包和DocumentLoader
- 添加嵌入集成测试

## 主要改进
- 将@root_validator用法升级以与pydantic 2保持一致
- 将根验证器升级为与pydantic 2兼容

## 修复问题
- 修复Azure的json模式问题
- 修复Databricks Vector Search演示笔记本问题
- 修复Microsoft Azure Cosmos集成测试中的连接字符串问题
18 changes: 15 additions & 3 deletions src/command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,61 @@

import argparse

import argparse # 导入argparse库,用于处理命令行参数解析

class CommandHandler:
def __init__(self, github_client, subscription_manager, report_generator):
# 初始化CommandHandler,接收GitHub客户端、订阅管理器和报告生成器
self.github_client = github_client
self.subscription_manager = subscription_manager
self.report_generator = report_generator
self.parser = self.create_parser()
self.parser = self.create_parser() # 创建命令行解析器

def create_parser(self):
# 创建并配置命令行解析器
parser = argparse.ArgumentParser(
description='GitHub Sentinel Command Line Interface',
formatter_class=argparse.RawTextHelpFormatter
)
subparsers = parser.add_subparsers(title='Commands', dest='command')

# 添加订阅命令
parser_add = subparsers.add_parser('add', help='Add a subscription')
parser_add.add_argument('repo', type=str, help='The repository to subscribe to (e.g., owner/repo)')
parser_add.set_defaults(func=self.add_subscription)

# 删除订阅命令
parser_remove = subparsers.add_parser('remove', help='Remove a subscription')
parser_remove.add_argument('repo', type=str, help='The repository to unsubscribe from (e.g., owner/repo)')
parser_remove.set_defaults(func=self.remove_subscription)

# 列出所有订阅命令
parser_list = subparsers.add_parser('list', help='List all subscriptions')
parser_list.set_defaults(func=self.list_subscriptions)

# 导出每日进展命令
parser_export = subparsers.add_parser('export', help='Export daily progress')
parser_export.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)')
parser_export.set_defaults(func=self.export_daily_progress)

# 导出特定日期范围进展命令
parser_export_range = subparsers.add_parser('export-range', help='Export progress over a range of dates')
parser_export_range.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)')
parser_export_range.add_argument('days', type=int, help='The number of days to export progress for')
parser_export_range.set_defaults(func=self.export_progress_by_date_range)

# 生成日报命令
parser_generate = subparsers.add_parser('generate', help='Generate daily report from markdown file')
parser_generate.add_argument('file', type=str, help='The markdown file to generate report from')
parser_generate.set_defaults(func=self.generate_daily_report)

# 帮助命令
parser_help = subparsers.add_parser('help', help='Show help message')
parser_help.set_defaults(func=self.print_help)

return parser
return parser # 返回配置好的解析器

# 下面是各种命令对应的方法实现,每个方法都使用了相应的管理器来执行实际操作,并输出结果信息
def add_subscription(self, args):
self.subscription_manager.add_subscription(args.repo)
print(f"Added subscription for repository: {args.repo}")
Expand Down Expand Up @@ -72,4 +84,4 @@ def generate_daily_report(self, args):
print(f"Generated daily report from file: {args.file}")

def print_help(self, args=None):
self.parser.print_help()
self.parser.print_help() # 输出帮助信息
52 changes: 26 additions & 26 deletions src/command_tool.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
import shlex
import shlex # 导入shlex库,用于正确解析命令行输入

from config import Config
from github_client import GitHubClient
from notifier import Notifier
from report_generator import ReportGenerator
from llm import LLM
from subscription_manager import SubscriptionManager
from command_handler import CommandHandler
from logger import LOG
from config import Config # 从config模块导入Config类,用于配置管理
from github_client import GitHubClient # 从github_client模块导入GitHubClient类,用于GitHub API操作
from notifier import Notifier # 从notifier模块导入Notifier类,用于通知功能
from report_generator import ReportGenerator # 从report_generator模块导入ReportGenerator类,用于报告生成
from llm import LLM # 从llm模块导入LLM类,可能用于语言模型相关操作
from subscription_manager import SubscriptionManager # 从subscription_manager模块导入SubscriptionManager类,管理订阅
from command_handler import CommandHandler # 从command_handler模块导入CommandHandler类,处理命令行命令
from logger import LOG # 从logger模块导入LOG对象,用于日志记录

def main():
config = Config()
github_client = GitHubClient(config.github_token)
notifier = Notifier(config.notification_settings)
llm = LLM()
report_generator = ReportGenerator(llm)
subscription_manager = SubscriptionManager(config.subscriptions_file)
command_handler = CommandHandler(github_client, subscription_manager, report_generator)
config = Config() # 创建配置实例
github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例
notifier = Notifier(config.notification_settings) # 创建通知器实例
llm = LLM() # 创建语言模型实例
report_generator = ReportGenerator(llm) # 创建报告生成器实例
subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例
command_handler = CommandHandler(github_client, subscription_manager, report_generator) # 创建命令处理器实例

parser = command_handler.parser
command_handler.print_help()
parser = command_handler.parser # 获取命令解析器
command_handler.print_help() # 打印帮助信息

while True:
try:
user_input = input("GitHub Sentinel> ")
if user_input in ['exit', 'quit']:
user_input = input("GitHub Sentinel> ") # 等待用户输入
if user_input in ['exit', 'quit']: # 如果输入为退出命令,则结束循环
break
try:
args = parser.parse_args(shlex.split(user_input))
if args.command is None:
args = parser.parse_args(shlex.split(user_input)) # 解析用户输入的命令
if args.command is None: # 如果没有命令被解析,则继续循环
continue
args.func(args)
except SystemExit as e:
args.func(args) # 执行对应的命令函数
except SystemExit as e: # 捕获由于错误命令引发的异常
LOG.error("Invalid command. Type 'help' to see the list of available commands.")
except Exception as e:
LOG.error(f"Unexpected error: {e}")
LOG.error(f"Unexpected error: {e}") # 记录其他未预期的错误

if __name__ == '__main__':
main()
main() # 如果直接运行该文件,则执行main函数
12 changes: 10 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import json
import os

class Config:
def __init__(self):
self.load_config()

def load_config(self):
# 从环境变量获取GitHub Token
self.github_token = os.getenv('GITHUB_TOKEN')

with open('config.json', 'r') as f:
config = json.load(f)
self.github_token = config.get('github_token')

# 如果环境变量中没有GitHub Token,则从配置文件中读取
if not self.github_token:
self.github_token = config.get('github_token')

self.notification_settings = config.get('notification_settings')
self.subscriptions_file = config.get('subscriptions_file')
self.update_interval = config.get('update_interval', 24 * 60 * 60) # Default to 24 hours
self.update_interval = config.get('update_interval', 24 * 60 * 60) # 默认24小时
54 changes: 28 additions & 26 deletions src/daemon_process.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,53 @@
import daemon
import threading
import time
import daemon # 导入daemon库,用于创建守护进程
import threading # 导入threading库,用于多线程处理
import time # 导入time库,用于控制时间间隔


from config import Config
from github_client import GitHubClient
from notifier import Notifier
from report_generator import ReportGenerator
from llm import LLM
from subscription_manager import SubscriptionManager
from scheduler import Scheduler
from logger import LOG
from config import Config # 导入配置管理类
from github_client import GitHubClient # 导入GitHub客户端类,处理GitHub API请求
from notifier import Notifier # 导入通知器类,用于发送通知
from report_generator import ReportGenerator # 导入报告生成器类
from llm import LLM # 导入语言模型类,可能用于生成报告内容
from subscription_manager import SubscriptionManager # 导入订阅管理器类,管理GitHub仓库订阅
from scheduler import Scheduler # 导入调度器类,用于定时执行任务
from logger import LOG # 导入日志记录器

def run_scheduler(scheduler):
# 启动调度器的函数,用于在线程中运行
scheduler.start()

def main():
config = Config()
github_client = GitHubClient(config.github_token)
notifier = Notifier(config.notification_settings)
llm = LLM()
report_generator = ReportGenerator(llm)
subscription_manager = SubscriptionManager(config.subscriptions_file)
config = Config() # 创建配置实例
github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例
notifier = Notifier(config.notification_settings) # 创建通知器实例
llm = LLM() # 创建语言模型实例
report_generator = ReportGenerator(llm) # 创建报告生成器实例
subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例

# 创建调度器实例,配置其参数
scheduler = Scheduler(
github_client=github_client,
notifier=notifier,
report_generator=report_generator,
subscription_manager=subscription_manager,
interval=config.update_interval
interval=config.update_interval # 设置更新间隔
)

# 创建并启动调度器运行的线程
scheduler_thread = threading.Thread(target=run_scheduler, args=(scheduler,))
scheduler_thread.daemon = True
scheduler_thread.start()
scheduler_thread.daemon = True # 设置线程为守护线程
scheduler_thread.start() # 启动线程

LOG.info("Scheduler thread started.")
LOG.info("Scheduler thread started.") # 记录调度器线程已启动

# Use python-daemon to properly daemonize the process
# 使用python-daemon库,以守护进程方式运行程序
with daemon.DaemonContext():
try:
while True:
time.sleep(config.update_interval)
time.sleep(config.update_interval) # 按配置的更新间隔休眠
except KeyboardInterrupt:
LOG.info("Daemon process stopped.")
LOG.info("Daemon process stopped.") # 在接收到中断信号时记录日志

if __name__ == '__main__':
main()

# nohup python3 src/daemon_process.py > logs/daemon_process.log 2>&1 &
# 启动方式:nohup python3 src/daemon_process.py > logs/daemon_process.log 2>&1 &
6 changes: 0 additions & 6 deletions src/github_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ def export_daily_progress(self, repo):
file.write("\n## Issues Closed Today\n")
for issue in updates['issues']: # 写入今天关闭的问题
file.write(f"- {issue['title']} #{issue['number']}\n")
file.write("\n## Pull Requests Merged Today\n")
for pr in updates['pull_requests']: # 写入今天合并的拉取请求
file.write(f"- {pr['title']} #{pr['number']}\n")

LOG.info(f"Exported daily progress to {file_path}") # 记录日志
return file_path
Expand All @@ -91,9 +88,6 @@ def export_progress_by_date_range(self, repo, days):
file.write(f"\n## Issues Closed in the Last {days} Days\n")
for issue in updates['issues']: # 写入在指定日期内关闭的问题
file.write(f"- {issue['title']} #{issue['number']}\n")
file.write(f"\n## Pull Requests Merged in the Last {days} Days\n")
for pr in updates['pull_requests']: # 写入在指定日期内合并的拉取请求
file.write(f"- {pr['title']} #{pr['number']}\n")

LOG.info(f"Exported time-range progress to {file_path}") # 记录日志
return file_path
37 changes: 20 additions & 17 deletions src/gradio_server.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,41 @@
import gradio as gr
import gradio as gr # 导入gradio库用于创建GUI

from config import Config
from github_client import GitHubClient
from report_generator import ReportGenerator
from llm import LLM
from subscription_manager import SubscriptionManager
from logger import LOG
from config import Config # 导入配置管理模块
from github_client import GitHubClient # 导入用于GitHub API操作的客户端
from report_generator import ReportGenerator # 导入报告生成器模块
from llm import LLM # 导入可能用于处理语言模型的LLM类
from subscription_manager import SubscriptionManager # 导入订阅管理器
from logger import LOG # 导入日志记录器

# 创建各个组件的实例
config = Config()
github_client = GitHubClient(config.github_token)
llm = LLM()
report_generator = ReportGenerator(llm)
subscription_manager = SubscriptionManager(config.subscriptions_file)


def export_progress_by_date_range(repo, days):
raw_file_path = github_client.export_progress_by_date_range(repo, days)
report, report_file_path = report_generator.generate_report_by_date_range(raw_file_path, days)
# 定义一个函数,用于导出和生成指定时间范围内项目的进展报告
raw_file_path = github_client.export_progress_by_date_range(repo, days) # 导出原始数据文件路径
report, report_file_path = report_generator.generate_report_by_date_range(raw_file_path, days) # 生成并获取报告内容及文件路径

return report, report_file_path
return report, report_file_path # 返回报告内容和报告文件路径

# 创建Gradio界面
demo = gr.Interface(
fn=export_progress_by_date_range,
title="GitHubSentinel",
fn=export_progress_by_date_range, # 指定界面调用的函数
title="GitHubSentinel", # 设置界面标题
inputs=[
gr.Dropdown(
subscription_manager.list_subscriptions(), label="订阅列表", info="已订阅GitHub项目"
),
), # 下拉菜单选择订阅的GitHub项目
gr.Slider(value=2, minimum=1, maximum=7, step=1, label="报告周期", info="生成项目过去一段时间进展,单位:天"),

# 滑动条选择报告的时间范围
],
outputs=[gr.Markdown(), gr.File(label="下载报告")],
outputs=[gr.Markdown(), gr.File(label="下载报告")], # 输出格式:Markdown文本和文件下载
)

if __name__ == "__main__":
demo.launch(share=True, server_name="0.0.0.0")
demo.launch(share=True, server_name="0.0.0.0") # 启动界面并设置为公共可访问
# 可选带有用户认证的启动方式
# demo.launch(share=True, server_name="0.0.0.0", auth=("django", "1234"))
24 changes: 15 additions & 9 deletions src/llm.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
import os
import json
from openai import OpenAI # 导入OpenAI库用于访问GPT模型
from logger import LOG # 导入日志模块

class LLM:
def __init__(self):
# 创建一个OpenAI客户端实例
self.client = OpenAI()
# 从TXT文件加载提示信息
with open("prompts/report_prompt.txt", "r", encoding='utf-8') as file:
self.system_prompt = file.read()
# 配置日志文件,当文件大小达到1MB时自动轮转,日志级别为DEBUG
LOG.add("daily_progress/llm_logs.log", rotation="1 MB", level="DEBUG")
LOG.add("logs/llm_logs.log", rotation="1 MB", level="DEBUG")

def generate_daily_report(self, markdown_content, dry_run=False):
# 构建一个用于生成报告的提示文本,要求生成的报告包含新增功能、主要改进和问题修复
prompt = f"以下是项目的最新进展,根据功能合并同类项,形成一份简报,至少包含:1)新增功能;2)主要改进;3)修复问题;:\n\n{markdown_content}"

# 使用从TXT文件加载的提示信息
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": markdown_content},
]

if dry_run:
# 如果启用了dry_run模式,将不会调用模型,而是将提示信息保存到文件中
LOG.info("Dry run mode enabled. Saving prompt to file.")
with open("daily_progress/prompt.txt", "w+") as f:
f.write(prompt)
# 格式化JSON字符串的保存
json.dump(messages, f, indent=4, ensure_ascii=False)
LOG.debug("Prompt saved to daily_progress/prompt.txt")
return "DRY RUN"

Expand All @@ -27,10 +35,8 @@ def generate_daily_report(self, markdown_content, dry_run=False):
try:
# 调用OpenAI GPT模型生成报告
response = self.client.chat.completions.create(
model="gpt-3.5-turbo", # 指定使用的模型版本
messages=[
{"role": "user", "content": prompt} # 提交用户角色的消息
]
model="gpt-4o-mini", # 指定使用的模型版本
messages=messages
)
LOG.debug("GPT response: {}", response)
# 返回模型生成的内容
Expand Down
Loading

0 comments on commit 7f76b6e

Please sign in to comment.