Skip to content

Commit

Permalink
Merge branch 'main' into sortformer_docs
Browse files Browse the repository at this point in the history
  • Loading branch information
tango4j authored Jan 4, 2025
2 parents 08d0a43 + 98f0b76 commit 9e84633
Show file tree
Hide file tree
Showing 73 changed files with 4,238 additions and 528 deletions.
64 changes: 45 additions & 19 deletions .github/workflows/changelog-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,25 @@ name: 'Changelog Build (Release)'

on:
workflow_dispatch:
push:
tags:
- '*'
inputs:
last-release-tag:
description: Last Git tag to start from (exclusive) (e.g. `v2.0.0`)
type: string
required: true
release-branch:
description: Release branch to build changelog on (e.g. `r2.1.0`)
type: string
required: true

jobs:
changelog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Checkout branch
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required due to the way Git works, without it this action won't be able to find any or the correct tags

- name: Get Previous tag
id: previous_tag
# git for-each-ref --sort=-creatordate --format '%(refname)' refs/tags ==> refs/tags/vX.Y.Z in descending order of date
# awk 'FNR == 2 {print substr($1, 11, length($1))}') ==> Selects the 2nd tag from the list, then strips the /refs/tags/ part of the tag
# set-output name=tag_name:: ==> Takes the clean tag vX.Y.Z and sets it to steps.previous_tag.outputs.tag_name
run: |
echo "::set-output name=tag_name::$(git for-each-ref --sort=-creatordate --format '%(refname)' refs/tags | awk 'FNR == 2 {print substr($1, 11, length($1))}')"
echo ${{ steps.previous_tag.outputs.tag_name }}
ref: ko3n1g/ci/fix-changelog-generator
fetch-depth: 0

- name: Build Changelog
id: github_tag
Expand All @@ -38,10 +37,37 @@ jobs:
repo: "NeMo"
ignorePreReleases: "false"
failOnError: "false"
fromTag: ${{ steps.previous_tag.outputs.tag_name }}
toTag: ${{ github.ref_name || github.sha }}
fromTag: ${{ inputs.last-release-tag }}
toTag: ${{ inputs.release-branch }}

- name: Print Changelog
- name: Update changelog file
env:
RELEASE_BRANCH: ${{ inputs.release-branch }}
CHANGELOG: ${{ steps.github_tag.outputs.changelog }}
shell: bash -x -e -u -o pipefail {0}
run: |
echo "${{steps.github_tag.outputs.changelog}}"
echo "--- DONE ---"
RELEASE_VERSION=${RELEASE_BRANCH#r}
CHANGELOG=$(echo "$CHANGELOG" | sed '/^[[:blank:]]*#/s/#/###/')
RELEASE_NOTES="## NVIDIA Neural Modules $RELEASE_VERSION
### Detailed Changelogs:
$CHANGELOG"
printf "%s\n" "$RELEASE_NOTES" | sed '/<!-- Next changelog -->/r /dev/stdin' CHANGELOG.md > CHANGELOG.tmp.md
mv CHANGELOG.tmp.md CHANGELOG.md
- name: Inspect new changelog file
run: cat CHANGELOG.md

- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
with:
commit-message: "beep boop: Update changelog"
title: "Update changelog for `${{ inputs.release-branch }}`"
signoff: true
sign-commits: true
base: main
branch: bot/chore/update-changelog-into-${{ inputs.release-branch }}
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ on:

jobs:
release:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected].2
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected].3
with:
release-ref: ${{ inputs.release-ref }}
image-name: nemo_container
Expand Down
413 changes: 413 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T

# Install NeMo requirements
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG MODELOPT_VERSION=0.19.0
ARG MODELOPT_VERSION=0.21.0
ARG MCORE_TAG=bd677bfb13ac2f19deaa927adc6da6f9201d66aa

ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
Expand Down
2 changes: 1 addition & 1 deletion examples/llm/sft/hf_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
triton_model_name=args.triton_model_name,
triton_model_version=1,
max_batch_size=64,
port=8000,
http_port=8000,
address="0.0.0.0",
)

Expand Down
4 changes: 2 additions & 2 deletions examples/nlp/language_modeling/conf/megatron_gpt_ptq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ model:
activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective'

quantization:
decoder_type: ${export.decoder_type} # gptnext, gpt2, llama
decoder_type: ${export.decoder_type} # gpt, llama
algorithm: fp8 # null, int8_sq, fp8, int4_awq
calib_dataset: cnn_dailymail # wikitext, cnn_dailymail, or a local dataset
num_calib_size: 512 # number of samples used for calibration
Expand All @@ -41,7 +41,7 @@ quantization:
enable_kv_cache: null # Enable FP8 KV cache quantization. Set to null for automatic selection.

export:
decoder_type: llama # gptnext, gpt2, llama
decoder_type: llama # gpt, llama
inference_tensor_parallel: 1 # Default using 1 TP for inference
inference_pipeline_parallel: 1 # Default using 1 PP for inference
dtype: 16 # Default precision data type for non-quantized layers: 16 or bf16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ model:
reduce_on_plateau: false

quantization:
decoder_type: ${export.decoder_type} # gptnext, gpt2, llama
decoder_type: ${export.decoder_type} # gpt, llama
algorithm: int4 # null, int8_sq, fp8, int4_awq, int4
num_calib_size: 512 # number of samples used for calibration
awq_block_size: 128 # block size for scaling factors (only used in AWQ algorithms)
sq_alpha: 1.0 # alpha parameter (only used in SmoothQuant algorithms)
enable_kv_cache: false # Enable FP8 KV cache quantization. Set to null for automatic selection.

export:
decoder_type: llama # gptnext, gpt2, llama
decoder_type: llama # gpt, llama
inference_tensor_parallel: 1 # Default using 1 TP for inference
inference_pipeline_parallel: 1 # Default using 1 PP for inference
dtype: ${trainer.precision} # Default precision data type
Expand Down
129 changes: 129 additions & 0 deletions examples/speechlm/sft/hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import fiddle as fdl
import torch
from lhotse.dataset.collation import collate_matrices, collate_vectors
from omegaconf import OmegaConf

from nemo import lightning as nl
from nemo.collections import speechlm
from nemo.collections.common.data.lhotse import get_lhotse_dataloader_from_config
from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
from nemo.collections.speechlm.models import HFAutoModelForSpeechSeq2Seq

torch.set_float32_matmul_precision("medium")


class LhotseHfNeMoDataset(torch.utils.data.Dataset):
def __init__(self, processor, tokenizer, decoder_mask_fill=-100):
super().__init__()
self.processor = processor
self.tokenizer = tokenizer
self.decoder_mask_fill = decoder_mask_fill

def __getitem__(self, cuts):
features = []
for cut in cuts:
audio = cut.load_audio()
features.append(
self.processor(
audio,
sampling_rate=cut.sampling_rate,
return_tensors="pt",
text=cut.supervisions[0].text,
)
)

input_features = collate_matrices(tensors=[f["input_features"].squeeze(0) for f in features])
labels = collate_vectors(tensors=[c.supervisions[0].tokens for c in cuts])
decoder_input_ids = labels[:, :-1]
decoder_input_ids = decoder_input_ids.masked_fill(
decoder_input_ids == self.decoder_mask_fill, self.tokenizer.pad_id
)
labels = labels[:, 1:].reshape(-1)

return {
"input_features": input_features,
"labels": labels,
"decoder_input_ids": decoder_input_ids,
}


if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser()

# Models can be one of the supported ones by AutoModelForSpeechSeq2Seq such as
# openai/whisper-large-v3 and facebook/s2t-small-librispeech-asr
parser.add_argument('--model', default='openai/whisper-large-v3')
parser.add_argument('--strategy', type=str, default='auto', choices=['auto', 'ddp', 'fsdp'])
parser.add_argument('--devices', default=1)
parser.add_argument('--accelerator', default='gpu', choices=['gpu'])
parser.add_argument('--max-steps', type=int, default=100)
parser.add_argument('--model-save-path', type=str, default=None)
args = parser.parse_args()

model = HFAutoModelForSpeechSeq2Seq(model_name=args.model)
model = model.to(torch.float)
processor = model.processor
tokenizer = AutoTokenizer(args.model, include_special_tokens=True)

config = OmegaConf.create(
{
"cuts_path": "/opt/checkpoints/lhotse/libri/libri-train-5.jsonl.gz",
"sample_rate": 16000,
"shuffle": True,
"num_workers": 2,
"batch_size": 4,
"shuffle_buffer_size": 100,
}
)

train_dataloader = get_lhotse_dataloader_from_config(
config,
global_rank=0,
world_size=1,
dataset=LhotseHfNeMoDataset(
processor=processor,
tokenizer=tokenizer,
),
tokenizer=tokenizer,
)

speechlm.api.finetune(
model=model,
data=train_dataloader,
trainer=nl.Trainer(
devices=args.devices,
max_steps=args.max_steps,
accelerator=args.accelerator,
strategy=args.strategy,
precision="bf16-mixed",
log_every_n_steps=1,
limit_val_batches=0.0,
num_sanity_val_steps=0,
accumulate_grad_batches=10,
gradient_clip_val=0.5,
use_distributed_sampler=False,
callbacks=[],
logger=None,
),
optim=fdl.build(speechlm.adam.pytorch_adam_with_flat_lr(lr=1e-5)),
log=None,
)

if args.model_save_path is not None:
model.save_pretrained(args.model_save_path)
1 change: 0 additions & 1 deletion nemo/collections/asr/models/hybrid_rnnt_ctc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ def transcribe(
decoding_cfg.preserve_alignments = True
self.change_decoding_strategy(decoding_cfg, decoder_type=self.cur_decoder, verbose=False)
else:
return_hypotheses = False
with open_dict(decoding_cfg):
decoding_cfg.compute_timestamps = False
decoding_cfg.preserve_alignments = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(
additional_special_tokens: Optional[List] = [],
use_fast: Optional[bool] = False,
trust_remote_code: Optional[bool] = False,
include_special_tokens: bool = False,
):
"""
Args:
Expand All @@ -63,6 +64,7 @@ def __init__(
unk_token: token to use for unknown tokens
additional_special_tokens: list of other tokens beside standard special tokens (bos, eos, pad, etc.). For example, sentinel tokens for T5 (<extra_id_0>, <extra_id_1>, etc.)
use_fast: whether to use fast HuggingFace tokenizer
include_special_tokens: when True, converting text to ids will include special tokens / prompt tokens (if any), yielding self.tokenizer(text).input_ids
"""
try:
# this logic deals with different huggingface tokenizers having different positional args
Expand Down Expand Up @@ -92,6 +94,7 @@ def __init__(
f'Unable to instantiate HuggingFace AUTOTOKENIZER for {pretrained_model_name}. Exception: {e}'
)

self.include_special_tokens = include_special_tokens
self.original_vocab_size = len(self.tokenizer)
special_tokens_dict = {}

Expand Down Expand Up @@ -220,6 +223,8 @@ def ids_to_tokens(self, ids):
return tokens

def text_to_ids(self, text):
if self.include_special_tokens:
return self.tokenizer(text).input_ids
tokens = self.text_to_tokens(text)
ids = self.tokens_to_ids(tokens)
return ids
Expand Down
Loading

0 comments on commit 9e84633

Please sign in to comment.