Merge branch 'main' into feature/pipeline

hpcaitech · Aug 14, 2023 · 6990477 · 6990477
2 parents d4a3a10 + d86ddd9
commit 6990477
Show file tree

Hide file tree

Showing 172 changed files with 4,185 additions and 5,539 deletions.
diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml
@@ -208,7 +208,7 @@ jobs:
 
  - name: Execute Unit Testing
  run: |
- CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest --testmon --testmon-cov=. tests/
+ CURL_CA_BUNDLE="" PYTHONPATH=$PWD pytest --testmon --testmon-cov=. --durations=10 tests/
  env:
  DATA: /data/scratch/cifar-10
  NCCL_SHM_DISABLE: 1

diff --git a/.github/workflows/build_on_schedule.yml b/.github/workflows/build_on_schedule.yml
@@ -3,7 +3,7 @@ name: Build on Schedule
 on:
  schedule:
  # run at 00:00 of every Sunday
- - cron:  '0 0 * * *'
+ - cron: "0 0 * * *"
  workflow_dispatch:
 
 jobs:
@@ -60,7 +60,7 @@ jobs:
  - name: Unit Testing
  if: steps.check-avai.outputs.avai == 'true'
  run: |
- PYTHONPATH=$PWD pytest tests
+ PYTHONPATH=$PWD pytest --durations=0 tests
  env:
  DATA: /data/scratch/cifar-10
  LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64

diff --git a/.github/workflows/compatiblity_test_on_dispatch.yml b/.github/workflows/compatiblity_test_on_dispatch.yml
@@ -72,7 +72,7 @@ jobs:
  ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
  - name: Download cub for CUDA 10.2
  run: |
- CUDA_VERSION=$(cat $CUDA_HOME/version.txt | grep "CUDA Version" | awk '{print $NF}' | cut -d. -f1,2)
+ CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
 
  # check if it is CUDA 10.2
  # download cub

diff --git a/.github/workflows/compatiblity_test_on_pr.yml b/.github/workflows/compatiblity_test_on_pr.yml
@@ -66,7 +66,7 @@ jobs:
  ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
  - name: Download cub for CUDA 10.2
  run: |
- CUDA_VERSION=$(cat $CUDA_HOME/version.txt | grep "CUDA Version" | awk '{print $NF}' | cut -d. -f1,2)
+ CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
 
  # check if it is CUDA 10.2
  # download cub

diff --git a/.github/workflows/compatiblity_test_on_schedule.yml b/.github/workflows/compatiblity_test_on_schedule.yml
@@ -61,6 +61,18 @@ jobs:
  with:
  ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
 
+ - name: Download cub for CUDA 10.2
+ run: |
+ CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
+
+ # check if it is CUDA 10.2
+ # download cub
+ if [ "$CUDA_VERSION" = "10.2" ]; then
+ wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
+ unzip 1.8.0.zip
+ cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
+ fi
+
  - name: Install Colossal-AI
  run: |
  pip install -v --no-cache-dir .

diff --git a/.github/workflows/cuda_ext_check_before_merge.yml b/.github/workflows/cuda_ext_check_before_merge.yml
@@ -37,6 +37,18 @@ jobs:
  - name: Install PyTorch
  run: eval ${{ matrix.build.torch_command }}
 
+ - name: Download cub for CUDA 10.2
+ run: |
+ CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
+
+ # check if it is CUDA 10.2
+ # download cub
+ if [ "$CUDA_VERSION" = "10.2" ]; then
+ wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
+ unzip 1.8.0.zip
+ cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
+ fi
+
  - name: Build
  run: |
  CUDA_EXT=1 pip install -v .
diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml
@@ -43,7 +43,9 @@ jobs:
  run: |
  cd applications/Chat
  rm -rf ~/.cache/colossalai
- ./examples/test_ci.sh
+ ./tests/test_inference.sh
+ ./tests/test_benchmarks.sh
+ ./tests/test_train.sh
  env:
  NCCL_SHM_DISABLE: 1
  MAX_JOBS: 8

diff --git a/README.md b/README.md
@@ -25,14 +25,15 @@
 </div>
 
 ## Latest News
+* [2023/07] [HPC-AI Tech Raises 22 Million USD in Series A Funding](https://www.hpc-ai.tech/blog/hpc-ai-tech-raises-22-million-usd-in-series-a-funding-to-fuel-team-expansion-and-business-growth)
+* [2023/07] [65B Model Pretraining Accelerated by 38%, Best Practices for Building LLaMA-Like Base Models Open-Source](https://www.hpc-ai.tech/blog/large-model-pretraining)
 * [2023/03] [ColossalChat: An Open-Source Solution for Cloning ChatGPT With a Complete RLHF Pipeline](https://medium.com/@yangyou_berkeley/colossalchat-an-open-source-solution-for-cloning-chatgpt-with-a-complete-rlhf-pipeline-5edf08fb538b)
 * [2023/03] [Intel and Colossal-AI Partner to Deliver Cost-Efficient Open-Source Solution for Protein Folding Structure Prediction](https://www.hpc-ai.tech/blog/intel-habana)
 * [2023/03] [AWS and Google Fund Colossal-AI with Startup Cloud Programs](https://www.hpc-ai.tech/blog/aws-and-google-fund-colossal-ai-with-startup-cloud-programs)
 * [2023/02] [Open Source Solution Replicates ChatGPT Training Process! Ready to go with only 1.6GB GPU Memory](https://www.hpc-ai.tech/blog/colossal-ai-chatgpt)
 * [2023/01] [Hardware Savings Up to 46 Times for AIGC and Automatic Parallelism](https://medium.com/pytorch/latest-colossal-ai-boasts-novel-automatic-parallelism-and-offers-savings-up-to-46x-for-stable-1453b48f3f02)
 * [2022/11] [Diffusion Pretraining and Hardware Fine-Tuning Can Be Almost 7X Cheaper](https://www.hpc-ai.tech/blog/diffusion-pretraining-and-hardware-fine-tuning-can-be-almost-7x-cheaper)
 * [2022/10] [Use a Laptop to Analyze 90% of Proteins, With a Single-GPU Inference Sequence Exceeding 10,000](https://www.hpc-ai.tech/blog/use-a-laptop-to-analyze-90-of-proteins-with-a-single-gpu-inference-sequence-exceeding)
-* [2022/09] [HPC-AI Tech Completes $6 Million Seed and Angel Round Fundraising](https://www.hpc-ai.tech/blog/hpc-ai-tech-completes-6-million-seed-and-angel-round-fundraising-led-by-bluerun-ventures-in-the)
 
 ## Table of Contents
 <ul>
@@ -49,6 +50,7 @@
  <li>
  <a href="#Parallel-Training-Demo">Parallel Training Demo</a>
  <ul>
+  <li><a href="#LLaMA">LLaMA</a></li>
   <li><a href="#GPT-3">GPT-3</a></li>
   <li><a href="#GPT-2">GPT-2</a></li>
   <li><a href="#BERT">BERT</a></li>
@@ -216,6 +218,15 @@ Acceleration of [AlphaFold Protein Structure](https://alphafold.ebi.ac.uk/)
 
 ## Parallel Training Demo
 
+### LLaMA
+<p align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/examples/images/LLaMA_pretraining.png" width=600/>
+</p>
+
+- 65-billion-parameter large model pretraining accelerated by 38%
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/example/llama/examples/language/llama)
+[[blog]](https://www.hpc-ai.tech/blog/large-model-pretraining)
+
 ### GPT-3
 <p align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/GPT3-v5.png" width=700/>
@@ -452,6 +463,7 @@ To cite this project, you can use the following BibTeX citation.
 }
 ```
 
-Colossal-AI has been accepted as official tutorial by top conferences [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/), [PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), etc.
+Colossal-AI has been accepted as official tutorial by top conferences [NeurIPS](https://nips.cc/), [SC](https://sc22.supercomputing.org/), [AAAI](https://aaai.org/Conferences/AAAI-23/), 
+[PPoPP](https://ppopp23.sigplan.org/), [CVPR](https://cvpr2023.thecvf.com/), [ISC](https://www.isc-hpc.com/), [NVIDIA GTC](https://www.nvidia.com/en-us/on-demand/session/gtcspring23-S51482/) ,etc.
 
 <p align="right">(<a href="#top">back to top</a>)</p>
diff --git a/applications/Chat/.gitignore b/applications/Chat/.gitignore
@@ -145,4 +145,4 @@ docs/.build
 # wandb log
 example/wandb/
 
-examples/awesome-chatgpt-prompts/
+examples/awesome-chatgpt-prompts/
diff --git a/applications/Chat/coati/dataset/__init__.py b/applications/Chat/coati/dataset/__init__.py
@@ -1,9 +1,10 @@
 from .prompt_dataset import PromptDataset
 from .reward_dataset import HhRlhfDataset, RmStaticDataset
-from .sft_dataset import DataCollatorForSupervisedDataset, SFTDataset, SupervisedDataset
+from .sft_dataset import SFTDataset, SupervisedDataset
 from .utils import is_rank_0
 
 __all__ = [
- 'RmStaticDataset', 'HhRlhfDataset', 'is_rank_0', 'SFTDataset', 'SupervisedDataset',
- 'DataCollatorForSupervisedDataset', 'PromptDataset'
+ 'RmStaticDataset', 'HhRlhfDataset',
+ 'SFTDataset', 'SupervisedDataset',
+ 'PromptDataset', 'is_rank_0',
 ]
diff --git a/applications/Chat/coati/dataset/conversation.py b/applications/Chat/coati/dataset/conversation.py
@@ -0,0 +1,87 @@
+# Copyright 2023 lm-sys@FastChat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import dataclasses
+from enum import Enum, auto
+from typing import List
+
+
+class SeparatorStyle(Enum):
+ ADD_EOS_TOKEN = auto()
+
+
+@dataclasses.dataclass
+class Conversation:
+ system: str
+ roles: List[str]
+ messages: List[List[str]]
+ offset: int
+ sep_style: SeparatorStyle = SeparatorStyle.ADD_EOS_TOKEN
+ sep: str = "</s>"
+
+ skip_next: bool = False
+
+ def get_prompt(self):
+ if self.sep_style == SeparatorStyle.ADD_EOS_TOKEN:
+ ret = self.system
+ for role, message in self.messages:
+ if message:
+ ret += role + ": " + message + self.sep
+ else:
+ ret += role + ": "
+ return ret
+ else:
+ raise ValueError(f"Invalid style: {self.sep_style}")
+
+ def append_message(self, role, message):
+ self.messages.append([role, message])
+
+ def to_gradio_chatbot(self):
+ ret = []
+ for i, (role, msg) in enumerate(self.messages[self.offset:]):
+ if i % 2 == 0:
+ ret.append([msg, None])
+ else:
+ ret[-1][-1] = msg
+ return ret
+
+ def copy(self):
+ return Conversation(system=self.system,
+ roles=self.roles,
+ messages=[[x, y] for x, y in self.messages],
+ offset=self.offset,
+ sep_style=self.sep_style,
+ sep=self.sep)
+
+ def dict(self):
+ return {
+ "system": self.system,
+ "roles": self.roles,
+ "messages": self.messages,
+ "offset": self.offset,
+ "sep": self.sep
+ }
+
+
+conv = Conversation(
+ system="A chat between a curious human and an artificial intelligence assistant. "
+ "The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
+ roles=("Human", "Assistant"),
+ messages=(),
+ offset=0,
+ sep_style=SeparatorStyle.ADD_EOS_TOKEN,
+ sep="</s>",
+)
+
+default_conversation = conv
diff --git a/applications/Chat/coati/dataset/prompt_dataset.py b/applications/Chat/coati/dataset/prompt_dataset.py
@@ -1,20 +1,13 @@
-import copy
-import random
 from collections import defaultdict
-from dataclasses import dataclass, field
-from typing import Callable, Dict, Sequence
+from typing import Dict
 
 import torch
-import torch.distributed as dist
 import transformers
 from torch.utils.data import Dataset
-from tqdm import tqdm
 
 from colossalai.logging import get_dist_logger
 
-from .utils import is_rank_0, jload
-
-logger = get_dist_logger()
+from .utils import jload
 
 
 class PromptDataset(Dataset):
@@ -27,12 +20,13 @@ def __init__(self,
  max_length: int = 96):
  super(PromptDataset, self).__init__()
  self.keyed_prompt = defaultdict(list)
- logger.info("Loading data...")
+ self.logger = get_dist_logger()
+ self.logger.info("Loading data...")
  list_data_dict = jload(data_path)
- logger.info(f"Loaded {len(list_data_dict)} examples.")
+ self.logger.info(f"Loaded {len(list_data_dict)} examples.")
 
  if max_datasets_size is not None:
- logger.info(f"Limiting dataset to {max_datasets_size} examples.")
+ self.logger.info(f"Limiting dataset to {max_datasets_size} examples.")
  list_data_dict = list_data_dict[:max_datasets_size]
 
  instructions = [data_dict["instruction"] for data_dict in list_data_dict]