Merge branch 'ray-knowledge:main' into main

d-run · Mar 26, 2024 · 3651ae5 · 3651ae5
2 parents 332d260 + e0249df
commit 3651ae5
Show file tree

Hide file tree

Showing 74 changed files with 849 additions and 179 deletions.
diff --git a/CODEOWNERS → .github/CODEOWNERS b/CODEOWNERS → .github/CODEOWNERS
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -0,0 +1,15 @@
+baize:
+  - "docs/zh/docs/baize/**/*"
+
+dak:
+  - "docs/zh/docs/dak/**/*"
+
+dmc:
+  - "docs/zh/docs/dmc/**/*"
+
+dtx:
+  - "docs/zh/docs/dtx/**/*"
+
+dev:
+  - "scripts/**/*"
+  - ".github/**/*"
diff --git a/.github/workflows/auto-label-pr.yaml b/.github/workflows/auto-label-pr.yaml
@@ -0,0 +1,18 @@
+name: Auto Labels PR
+
+# how usage: https://github.com/marketplace/actions/labeler
+
+on:
+- pull_request_target
+
+jobs:
+  triage:
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/labeler@v4
+        with:
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+          configuration-path: .github/labeler.yml # update role in this file
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -0,0 +1,74 @@
+# This is a basic workflow to help you get started with Actions
+
+name: deploy-for-main
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the main branch
+  push:
+    branches: [ main ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+env:
+  UCLOUD_PUBLICKEY : ${{ secrets.UCLOUD_PUBLICKEY }}
+  UCLOUD_PRIVATEKEY : ${{ secrets.UCLOUD_PRIVATEKEY }}
+  UCLOUD_REGION : ${{ secrets.UCLOUD_REGION }}
+  UCLOUD_BUCKET : ${{ secrets.UCLOUD_BUCKET }}
+  CI: 1
+
+concurrency:
+  group: ${{ github.workflow_ref }}
+  cancel-in-progress: true
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "build"
+  deploy:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0 # Required for mkdocs to be able to display pages last update info
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.x
+
+      # Add ssh private key
+      - name: Setup SSH
+        uses: MrSquaare/ssh-setup-action@v1
+        with:
+          host: github.com
+          private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      # Runs a single command using the runners shell
+      - run: pip install -r requirements.txt
+
+      # Install mkdocs-material-insiders
+      - run: git clone [email protected]:DaoCloud/mkdocs-material-insiders.git mkdocs-material
+      - run: pip install -e mkdocs-material
+
+      # add custom plugin with pdf support
+      # - run: pip install git+https://github.com/SAMZONG/mkdocs-with-pdf-support-material-v8
+
+      # build docs
+      - run: pwd
+      - run: mkdocs build -f docs/zh/mkdocs.yml -d ../../public/
+
+      # upload to ucloud bucket
+      - run: cd public && pwd && python ../scripts/upload-ucloud.py \
+                public_key=$UCLOUD_PUBLICKEY \
+                private_key=$UCLOUD_PRIVATEKEY \
+                region=$UCLOUD_REGION \
+                bucket=$UCLOUD_BUCKET
+
+      # refresh docs site cdn cache
+      - run: python scripts/refresh_cdn_cache.py \
+            publickey=$UCLOUD_PUBLICKEY \
+            privatekey=$UCLOUD_PRIVATEKEY
diff --git a/.github/workflows/size-labeler.yaml b/.github/workflows/size-labeler.yaml
@@ -0,0 +1,33 @@
+name: Size labeler
+
+on:
+  - pull_request_target
+
+jobs:
+  size-labeler:
+    permissions:
+      issues: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    name: Label the PR size
+    steps:
+      - uses: codelytv/pr-size-labeler@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          xs_label: "size/xs"
+          xs_max_size: "9"
+          s_label: "size/s"
+          s_max_size: "29"
+          m_label: "size/m"
+          m_max_size: "99"
+          l_label: "size/l"
+          l_max_size: "499"
+          xl_label: "size/xl"
+          xl_max_size: "999"
+          xll_label: "size/xll"
+          xll_max_size: "1999"
+          fail_if_xll: "false"
+          message_if_xll: >
+            This PR exceeds the recommended size of 2000 lines.
+            Please make sure you are NOT addressing multiple issues with one PR.
+            Note this PR might be rejected due to its size.
diff --git a/README.md b/README.md
@@ -1,7 +1,11 @@
-# DaoCloud d.run Documentation
+# d.run
 
 English | [中文](./README_zh.md)
 
+d.run = DaoCloud Runs Intelligence
+
+d.run unleashes your compute power
+
 d.run is a comprehensive AIGC platform that allows you to enhance your LLM models and
 leverage your organization's knowledge base. By integrating AI generation engines and plugins
 such as cloud native AI engine, DataTunerX, DaoCloud AIGC Knowledge, and infmonkeys,

diff --git a/README_zh.md b/README_zh.md
@@ -1,7 +1,11 @@
-# DaoCloud d.run 文档站
+# d.run
 
 [English](./README.md) | 中文
 
+d.run = DaoCloud Runs Intelligence
+
+d.run 让算力更自由
+
 d.run 是一个全方位的人工智能生成和增强平台，可以帮助您优化您的大语言模型，并利用您的知识库。
 通过整合 AI 生成引擎和各种插件，比如云原生 AI 引擎 baize、DataTunerX、DaoCloud AIGC 知识库和 infmonkeys 等插件，
 d.run 可以用来创建、训练和推断各种人工智能产品。这个 repo 包含了 d.run 网站和文档的源文件。

diff --git a/docs/zh/docs/baize/images/archi.png b/docs/zh/docs/baize/images/archi.png
diff --git a/docs/zh/docs/baize/index.md b/docs/zh/docs/baize/index.md
@@ -0,0 +1,18 @@
+# 什么是模型开发
+
+d.run 模型开发是 DaoCloud 推出的基于云原生操作系统的 AI 算力平台，能够提供软硬一体的 AI 智算体验，
+整合异构算力，优化 GPU 性能，实现算力资源统一调度和运营，最大化算力效用并降低算力开销，
+并且还提供了优化的 AI 开发框架，简化 AI 开发和部署，加速推动各行业的 AI 应用场景落地。
+
+## 功能特性
+
+- 算力资源全托管：依托于 [DCE 5.0](https://docs.daocloud.io/)，提供强大的基础设施能力，支持超大规模算力集群、异构 GPU 等一站式托管，并提供一系列如 vGPU 等软硬一体加速方案。
+- 数据编排：支持模型开发过程中数据管理与编排能力，提供如数据集管理、多数据源接入、数据集预热等功能，从底层容器存储引擎进行优化，保证数据的高效与稳定。
+- 开发环境管理：满足 MLOps 和 LLMOps 工程师对开发环境的需求，提供多种开发环境，包括 JupyterLab、VSCode(进行中) 等，支持自定义开发环境，一键挂载各种 GPU、数据集等资源。
+- 任务管理：支持训练任务的全生命周期管理，提供多种快速创建任务的方式；支持 Pytorch、TensorFlow、PaddlePaddle 等主流任务框架，天然支持单机、分布式、多节点、多卡等多种类任务调度。
+- GPU 管理：可以查看全部的 GPU 资源和 GPU 使用情况，支持 GPU 中当前和历史运行的任务情况查看，方便进行 GPU 压力评估。
+- 队列管理：支持创建队列，并将队列与工作空间进行关联，保障在各个集群中的队列资源的统筹与隔离。
+
+**逻辑架构图**
+
+![架构图](./images/archi.png)
diff --git a/docs/zh/docs/baize/intro/index.md b/docs/zh/docs/baize/intro/index.md
diff --git a/docs/zh/docs/dak/images/app-center.png b/docs/zh/docs/dak/images/app-center.png
diff --git a/docs/zh/docs/dak/images/chat01.png b/docs/zh/docs/dak/images/chat01.png
diff --git a/docs/zh/docs/dak/images/chat02.png b/docs/zh/docs/dak/images/chat02.png
diff --git a/docs/zh/docs/dak/index.md b/docs/zh/docs/dak/index.md
@@ -0,0 +1,12 @@
+---
+hide:
+  - toc
+---
+
+# 什么是智能应用
+
+d.run 智能应用是基于知识库构建的应用中心，包括 AI 对话、语料库、插件设置、数据分析、反馈和系统配置等。
+
+您在这里可以创建应用和插件，为 AI 对话构建语料库，然后通过数据分析来优化用户体验。
+
+![应用中心](./images/app-center.png)
diff --git a/docs/zh/docs/dak/intro/index.md b/docs/zh/docs/dak/intro/index.md
diff --git a/docs/zh/docs/dak/user-guide/System-Config/System-Config.md b/docs/zh/docs/dak/user-guide/System-Config/System-Config.md
diff --git a/docs/zh/docs/dak/user-guide/app-center/conversation.md b/docs/zh/docs/dak/user-guide/app-center/conversation.md
@@ -0,0 +1,50 @@
+# 应用对话说明
+
+**对话** 是 ChatGPT 出现后最常见的获取资讯方式。
+在 d.run 中，一个应用被发布之后，即可使用对话功能。
+您可以在对话中关联语料库后自由提问，可以随时查阅历史记录，另外导航栏还列出了最近使用的 3 组对话。
+
+## 日常对话
+
+1. 在左侧导航栏，点击 **应用中心** ，在已发布应用的右下角，点击对话图标。
+
+    ![点击对话图标](../../images/chat01.png)
+
+2. 在对话框中输入问题，点击 **发送** ，或敲击回车键，进行对话。对话框的输入字符限制为 6000 个字符。
+
+    ![聊天](../../images/chat02.png)
+
+## 对话管理
+
+![manage](../images/manage.png)
+
+- **置顶** 、 **重命名** 和 **删除** ：在历史记录窗格中，点击某一条对话右侧的 **⋮** ，可以置顶、重命名和删除对话
+- **清空** ：在对话页面左上角，点击 🧹 图标，可以 **清空** 对话
+- **关联语料库** ：在输入框左下角，点击 📖 图标，可以关联语料库
+
+    语料库的选择： **聊天应用** 会在选中的语料库中去匹配最相似的语料作为参考，回答您的问题。
+    显示操作成功表示已经选中语料库。也可以取消选中某个或某些语料库。
+
+    !!! info
+
+        如果无法选择和更改关联的语料库，只能添加语料库。中文的应用只可以选择中文语料库，英文的应用只能选择英文语料库。
+
+- **停止对话** ：提问后，点击输入框右侧的图标，可以在回答过程中停止对话，让助手停止输出内容
+
+## 几个实用图标
+
+![page-function](../images/page-function.png)
+
+- **评价** ：可以点赞 👍 或点踩 👎 某条回答，取决于您对回答内容是否满意。
+- **复制** ：可以复制某条回答。
+- **重置** ：可以重置、重新生成某一条回答。
+
+    !!! info "随机度"
+
+        对于重置的内容，可以通过管理员调节模型的 **随机度** 来控制聊天助手多次回答的一致性。
+
+        若随机度高，聊天助手多次回答的结果，可能会有不一样的答案。
+        如果要求回答的准确度高，可以降低随机度，这样聊天助手每次生成的结果会接近一致。
+
+- **删除** ：点击垃圾桶 :material-delete: 图标，可以删除某条回答，此后在聊天助手上下文对话时，将不会对删除的对话有记忆。
+- **改进意见** ：点击某条回答的最后一个图标 :bookmark_tabs:，可以提交反馈，根据聊天助手的回答结果好坏，在反馈中提出反馈意见。
diff --git a/docs/zh/docs/dak/user-guide/app-center/create-app.md b/docs/zh/docs/dak/user-guide/app-center/create-app.md
@@ -9,7 +9,7 @@ hide:
 
 1. 在 **应用中心** 页面中，点击 **创建** 按钮。
 
-    ![应用中心](images/app-center.png)
+    ![应用中心](../../images/app-center.png)
 
 2. 填写应用基础信息
 

diff --git a/docs/zh/docs/dak/user-guide/app-center/images/app-center.png b/docs/zh/docs/dak/user-guide/app-center/images/app-center.png
diff --git a/docs/zh/docs/dak/user-guide/conversation/conversation.md b/docs/zh/docs/dak/user-guide/conversation/conversation.md
diff --git a/docs/zh/docs/dak/user-guide/corpus/create-corpus.md b/docs/zh/docs/dak/user-guide/corpus/create-corpus.md
@@ -1,17 +1,20 @@
+---
+hide:
+  - toc
+---
+
 # 如何创建语料库
 
-## 操作步骤
 1. 在 **我的语料** 页面中，点击 **创建语料** 按钮。
 
 2. 参考下列要求填写语料库基本信息，并点击 **下一步** 。
 
-* 语料库名称：名称包含大小写字母、数字和符号。
-
-* 向量化模型服务：可选择为`bge-large-zh`和`bge-large-en`
+    * 语料库名称：名称包含大小写字母、数字和符号
 
-* 访问级别：可选择为公开/私有
+    * 向量化模型服务：可选择 `bge-large-zh` 和 `bge-large-en`
 
-* 简介：简要描述语料库中的内容信息，简介可为中英文，数字，限制400个字符
+    * 访问级别：可选择公开/私有
 
-![创建语料库](./images/create-corpus.png)
+    * 简介：简要描述语料库中的内容信息，可包含中英文、数字，最多 400 个字符
 
+    ![创建语料库](./images/create-corpus.png)