From 0f2f50d797ec64ed1ae2f9869b485d7039f16a52 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Tue, 28 Nov 2023 16:22:11 +0800 Subject: [PATCH 1/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E4=B8=8EREADME=E4=B8=8A=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/cv/audio/Ecapa_Tdnn/README.md | 176 ++++++++++++++++++ .../audio/Ecapa_Tdnn/export_torch_aie_ts.py | 60 ++++++ .../built-in/cv/audio/Ecapa_Tdnn/model_pt.py | 57 ++++++ .../built-in/cv/audio/Ecapa_Tdnn/pt_val.py | 94 ++++++++++ .../cv/audio/Ecapa_Tdnn/pytorch2onnx.py | 53 ++++++ 5 files changed, 440 insertions(+) create mode 100644 AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md create mode 100644 AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py create mode 100644 AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py create mode 100644 AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py create mode 100644 AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md new file mode 100644 index 000000000..820314ee0 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md @@ -0,0 +1,176 @@ +# ECAPA_TDNN模型-推理指导 + +- [概述](#ZH-CN_TOPIC_0000001172161501) + + - [输入输出数据](#section540883920406) + +- [推理环境准备](#ZH-CN_TOPIC_0000001126281702) + +- [快速上手](#ZH-CN_TOPIC_0000001126281700) + + - [获取源码](#section4622531142816) + - [准备数据集](#section183221994411) + - [模型推理](#section741711594517) + +- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573) + + ****** + + +# 概述 + +ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对传统的TDNN引入了多种改进。其中包括一维SE blocks,多层特征聚合(MFA)以及依赖于通道和上下文的统计池化。 + +- 参考实现: + + ```shell + url=https://github.com/Joovvhan/ECAPA-TDNN.git + ``` + +## 输入输出数据 + +- 输入数据 + + | 输入数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- |----------------------|--------| ------------ | + | input | FP32 | batchsize x 80 x 200 | ND | + + - 输出数据 + + | 输出数据 | 数据类型 | 大小 | 数据排布格式 | + |--------| -------- |--------------------|--------| + | output1 | FLOAT32 | batchsize x 192 | ND | + | output2 | FLOAT32 | batchsize x 200 x 1536 | ND | + + +# 推理环境准备 + +- 该模型需要以下依赖 + + **表 1** 版本配套表 + + + | 配套 | 版本 | 环境准备指导 | + |--------| ------- | ----------------------------------------------------------------------------------------------------- | + | 固件与驱动 | 23.0.0 | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) | + | CANN | 7.0.0 | - | + | Python | 3.9.0 | - | + | PyTorch | 2.0.1 | - | + | 说明:芯片类型:Ascend310P3 | \ | \ + + +# 快速上手 + +## 获取源码 + +1. 获取源码。 + + ``` + 获取推理部署代码 + git clone https://gitee.com/ascend/ModelZoo-PyTorch.git + cd ModelZoo-PyTorch/ACL_PyTorch/contrib/audio/Ecapa_Tdnn/ECAPA_TDNN + 获取源码 + git clone --recursive https://github.com/Joovvhan/ECAPA-TDNN.git + mv ECAPA-TDNN ECAPA_TDNN + export PYTHONPATH=$PYTHONPATH:./ECAPA_TDNN + export PYTHONPATH=$PYTHONPATH:./ECAPA_TDNN/tacotron2 + ``` + +2. 安装依赖。 + + ``` + pip install -r requirements.txt + ``` + +## 准备数据集 + +1. 获取原始数据集。(解压命令参考tar –xvf *.tar与 unzip *.zip) + +用户需自行获取VoxCeleb1数据集中测试集(无需训练集),上传数据集到服务器中,必须要与preprocess.py同目录。目录结构如下: + ``` + VoxCeleb1 + ├── id10270 + ├── 1zcIwhmdeo4 + ├── 00001.wav + ├── ... + ├── id10271 + ├── ... + ``` + +2. 数据预处理,将原始数据集转换为模型输入的数据。 + + 在当前工作目录下,执行以下命令行,其中VoxCeleb为数据集相对路径,input/为模型所需的输入数据相对路径,speaker/为后续后处理所需标签文件的相对路径 + ``` + python3 preprocess.py VoxCeleb1 input/ speaker/ + ``` + +## 模型推理 +1. 获取权重文件。 + ``` + wget https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/model/1_PyTorch_PTH/Ecapa_tdnn/PTH/checkpoint.zip + unzip checkpoint.zip + ``` + 获取基准精度,作为精度对比参考, checkpoint为权重文件相对路径, VoxCeleb为数据集相对路径 + ``` + python3 get_originroc.py checkpoint VoxCeleb1 + ``` + +2. 生成trace模型(ts) + ``` + 使用与本README同目录的pytorch2onnx.py替换掉原工程同名文件 + python3 pytorch2onnx.py checkpoint ecapa_tdnn.onnx + ``` + +3. 保存编译优化模型(非必要,可不执行。后续执行的推理脚本包含编译优化过程) + + ``` + python export_torch_aie_ts.py --batch_size=1 + ``` + 命令参数说明(参数见onnx2om.sh): + ``` + --torch_script_path:编译前的ts模型路径 + --soc_version:处理器型号 + --batch_size:模型batch size + --save_path:编译后的模型存储路径 + ``` + + +4. 执行推理脚本(包括性能验证) + + 将pt_val.py放在./yolov3下,model_pt.py放在./yolov3/common/util下 + ``` + python pt_val.py --batch_size=64 --model="ecapa_tdnn_torch_aie_bs64.pt" + ``` + 命令参数说明(参数见onnx2om.sh): + ``` + --data_path:验证集数据根目录,默认"VoxCeleb1" + --soc_version:处理器型号 + --model:输入模型路径 + --need_compile:是否需要进行模型编译(若使用export_torch_aie_ts.py输出的模型,则不用选该项) + --batch_size:模型batch size + --device_id:硬件编号 + ``` +# 模型推理性能&精度 + + 精度验证 + ``` + python postprocess.py result/output_bs1 speaker + ``` +命令参数说明(参数见onnx2om.sh): +``` + --result/output_bs1:为推理结果所在路径 + --speaker:为标签数据所在路径 + --4:batch size + --4648:样本总数 +``` + +**表 2** ecapa_tdnn模型精度 + +| batchsize | aie性能 | aie精度 | +|------------------------------------------------|-----------|--------| +| bs1 | 894.4216 | 0.9856 | +| bs4 | 2674.6597 | 0.9865 | +| bs8 | 3686.8627 | / | +| bs16 | 692.3013 | / | +| bs32 | 1358.1562 | / | +| bs64 | 2645.4167 | / | diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py new file mode 100644 index 000000000..535e47a3e --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py @@ -0,0 +1,60 @@ +# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import argparse +import torch +import torch_aie +from torch_aie import _enums + +def export_torch_aie(opt_args): + trace_model = torch.jit.load(opt_args.torch_script_path) + trace_model.eval() + + torch_aie.set_device(0) + inputs = [] + inputs.append(torch_aie.Input((opt_args.batch_size, 80, 10))) + torchaie_model = torch_aie.compile( + trace_model, + inputs=inputs, + precision_policy=_enums.PrecisionPolicy.FP16, + # precision_policy=_enums.PrecisionPolicy.PREF_FP32, + truncate_long_and_double=True, + require_full_compilation=False, + allow_tensor_replace_int=False, + min_block_size=3, + torch_executed_ops=[], + soc_version=opt_args.soc_version, + optimization_level=0) + suffix = os.path.splitext(opt_args.torch_script_path)[-1] + saved_name = os.path.basename(opt_args.torch_script_path).split('.')[0] + f"_torch_aie_bs{opt.batch_size}" + suffix + torchaie_model.save(os.path.join(opt_args.save_path, saved_name)) + print("torch aie ecapa_tdnn compiled done. saved model is ", os.path.join(opt_args.save_path, saved_name)) + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--torch_script_path', type=str, default='./ecapa_tdnn.torchscript.pt', help='trace model path') + parser.add_argument('--soc_version', type=str, default='Ascend310P3', help='soc version') + parser.add_argument('--batch_size', type=int, default=1, help='batch size') + parser.add_argument('--save_path', type=str, default='./', help='compiled model path') + opt_args = parser.parse_args() + return opt_args + +def main(opt_args): + export_torch_aie(opt_args) + +if __name__ == '__main__': + opt = parse_opt() + main(opt) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py new file mode 100644 index 000000000..f54171792 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py @@ -0,0 +1,57 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_aie +import numpy as np +import time +import os +import cv2 +from tqdm import tqdm +from torch_aie import _enums + +from pathlib import Path + + +def forward_nms_script(model, dataloader, batchsize, device_id): + pred_results = [] + inference_time = [] + loop_num = 0 + for img, _, _ in tqdm(dataloader): + img = img.contiguous() + + # pt infer + result, inference_time = pt_infer(model, img, device_id, loop_num, inference_time) + pred_results.append(result) + loop_num += 1 + # print(batchsize, inference_time) + avg_inf_time = sum(inference_time) / len(inference_time) / batchsize * 1000 + print('性能(毫秒):', avg_inf_time) + print("throughput(fps): ", 1000 / avg_inf_time) + + return pred_results + +def pt_infer(model, input_li, device_id, loop_num, inference_time): + input_npu_li = input_li.to("npu:" + str(device_id)) + stream = torch_aie.npu.Stream("npu:" + str(device_id)) + with torch_aie.npu.stream(stream): + inf_start = time.time() + output_npu = model.forward(input_npu_li) + stream.synchronize() + inf_end = time.time() + inf = inf_end - inf_start + if loop_num >= 5: # use 5 step to warmup + inference_time.append(inf) + results = output_npu[0].to("cpu") + return results, inference_time \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py new file mode 100644 index 000000000..ac0574b7a --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py @@ -0,0 +1,94 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml +import json +import cv2 +import argparse +import numpy as np +import torch +import torch_aie +from torch_aie import _enums +from glob import glob +from ECAPA_TDNN.prepare_batch_loader import struct_meta, write_to_csv, read_from_csv, reduce_meta, build_speaker_dict, collate_function +from torch.utils.data import DataLoader +from functools import partial +from model_pt import forward_nms_script + + +def load_meta(dataset, keyword='vox1'): + if keyword == 'vox1': + wav_files_test = sorted(glob(dataset + '/*/*/*.wav')) + print(f'Len. wav_files_test {len(wav_files_test)}') + test_meta = struct_meta(wav_files_test) + return test_meta + +def get_dataloader(keyword='vox1', t_thres=19, batchsize = 16, dataset = "VoxCeleb1"): + test_meta = load_meta(dataset, keyword) + test_meta_ = [meta for meta in (test_meta) if meta[2] < t_thres] + test_meta = reduce_meta(test_meta_, speaker_num=-1) + print(f'Meta reduced {len(test_meta_)} => {len(test_meta)}') + test_speakers = build_speaker_dict(test_meta) + dataset_test = DataLoader(test_meta, batch_size=batchsize, + shuffle=False, num_workers=1, + collate_fn=partial(collate_function, + speaker_table=test_speakers, + # max_mel_length=MAX_MEL_LENGTH), + max_mel_length=10), + drop_last=True) + return dataset_test, test_speakers + +def main(opt): + # load model + model = torch.jit.load(opt.model) + torch_aie.set_device(opt.device_id) + if opt.need_compile: + inputs = [] + inputs.append(torch_aie.Input((opt.batch_size, 80, 10))) + model = torch_aie.compile( + model, + inputs=inputs, + precision_policy=_enums.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_compilation=False, + allow_tensor_replace_int=False, + min_block_size=3, + torch_executed_ops=[], + soc_version=opt.soc_version, + optimization_level=0) + + # load dataset + dataloader, test_speakers = get_dataloader('vox1', 19, opt.batch_size, dataset=opt.data_path) + # inference & nms + pred_results = forward_nms_script(model, dataloader, opt.batch_size, opt.device_id) + output_folder = f"result/output_bs{opt.batch_size}" + if not os.path.exists(output_folder): + os.makedirs(output_folder) + for index, res in enumerate(pred_results): + # print("res", res) + for i, r in enumerate(res): + result_fname = 'mels' + str(index * opt.batch_size + i + 1) + '_0.bin' + np.array(r.numpy().tofile(os.path.join(output_folder, result_fname))) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='YOLOv3 offline model inference.') + parser.add_argument('--data_path', type=str, default="VoxCeleb1", help='root dir for val images and annotations') + parser.add_argument('--soc_version', type=str, default='Ascend310P3', help='soc version') + parser.add_argument('--model', type=str, default="ecapa_tdnn_torch_aie_bs1.pt", help='ts model path') + parser.add_argument('--need_compile', action="store_true", help='if the loaded model needs to be compiled or not') + parser.add_argument('--batch_size', type=int, default=1, help='batch size') + parser.add_argument('--device_id', type=int, default=0, help='device id') + opt = parser.parse_args() + main(opt) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py new file mode 100644 index 000000000..277863052 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py @@ -0,0 +1,53 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import torch + +import sys + +from ECAPA_TDNN.main import ECAPA_TDNN, load_checkpoint +from torch import optim +from functools import partial + + +def pth2onnx(checkpoint, output_file): + device = torch.device('cpu') + model = ECAPA_TDNN(1211, device).to(device) + + optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=2e-5) + + model, optimizer, step = load_checkpoint(model, optimizer, checkpoint, rank='cpu') + + model.forward = partial(model.forward, infer=True) + # 调整模型为eval mode + model.eval() + # 输入节点名 + # input_names = ["mel","speaker"] + input_names = ["mel"] + # 输出节点名 + output_names = ["output1", "output2"] + dynamic_axes = {'mel': {0: '-1'}, 'output1': {0: '-1'}, 'output2': {0: '-1'}} + dummy_input1 = torch.randn(1, 80, 200).to(device) + + # torch.onnx.export(model, dummy_input1, output_file, input_names=input_names, dynamic_axes=dynamic_axes,output_names=output_names, opset_version=12, verbose=True) + ts_model = torch.jit.trace(model, dummy_input1) + output_file = 'ecapa_tdnn.torchscript.pt' + ts_model.save(output_file) + + +if __name__ == "__main__": + checkpoint = sys.argv[1] + save_path = sys.argv[2] + pth2onnx(checkpoint, save_path) From fd24a9e3f613753d9ee2d4d82617b5018e9cae54 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Tue, 28 Nov 2023 16:35:33 +0800 Subject: [PATCH 2/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8BREADME?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md index 820314ee0..b01195f24 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md @@ -52,8 +52,7 @@ ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对 | 配套 | 版本 | 环境准备指导 | |--------| ------- | ----------------------------------------------------------------------------------------------------- | - | 固件与驱动 | 23.0.0 | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) | - | CANN | 7.0.0 | - | + | CANN | 7.1.T5.1.B113:7.0.0 | - | | Python | 3.9.0 | - | | PyTorch | 2.0.1 | - | | 说明:芯片类型:Ascend310P3 | \ | \ From 575f4095e8e37603b0094b56360ea2df07f1aca0 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:06:33 +0800 Subject: [PATCH 3/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=B8=8EREADME=E4=BF=AE=E6=94=B9=20?= =?UTF-8?q?FastPitch=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/cv/audio/Ecapa_Tdnn/README.md | 26 +++++++++---------- .../audio/Ecapa_Tdnn/export_torch_aie_ts.py | 3 +-- .../built-in/cv/audio/Ecapa_Tdnn/model_pt.py | 16 +++--------- .../built-in/cv/audio/Ecapa_Tdnn/pt_val.py | 10 ++----- .../{pytorch2onnx.py => pytorch2ts.py} | 12 --------- .../built-in/cv/audio/FastPitch/pth2ts.py | 3 +-- 6 files changed, 21 insertions(+), 49 deletions(-) rename AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/{pytorch2onnx.py => pytorch2ts.py} (74%) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md index b01195f24..c8d625352 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md @@ -116,8 +116,8 @@ ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对 2. 生成trace模型(ts) ``` - 使用与本README同目录的pytorch2onnx.py替换掉原工程同名文件 - python3 pytorch2onnx.py checkpoint ecapa_tdnn.onnx + 将pytorch2ts.py放在pytorch2onnx.py同一目录下 + python3 pytorch2ts.py checkpoint ecapa_tdnn.torchscript.pt ``` 3. 保存编译优化模型(非必要,可不执行。后续执行的推理脚本包含编译优化过程) @@ -136,7 +136,7 @@ ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对 4. 执行推理脚本(包括性能验证) - 将pt_val.py放在./yolov3下,model_pt.py放在./yolov3/common/util下 + 将pt_val.py与model_pt.py放在Ecapa_Tdnn下 ``` python pt_val.py --batch_size=64 --model="ecapa_tdnn_torch_aie_bs64.pt" ``` @@ -159,17 +159,17 @@ ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对 ``` --result/output_bs1:为推理结果所在路径 --speaker:为标签数据所在路径 - --4:batch size - --4648:样本总数 + --1(脚本内):batch size + --4648(脚本内):样本总数 ``` **表 2** ecapa_tdnn模型精度 -| batchsize | aie性能 | aie精度 | -|------------------------------------------------|-----------|--------| -| bs1 | 894.4216 | 0.9856 | -| bs4 | 2674.6597 | 0.9865 | -| bs8 | 3686.8627 | / | -| bs16 | 692.3013 | / | -| bs32 | 1358.1562 | / | -| bs64 | 2645.4167 | / | +| batchsize | aie性能 | aie精度 | +|------------------------------------------------|----------|---------| +| bs1 | 449.1879 | 0.99905 | +| bs4 | 877.4901 | 0.99909 | +| bs8 | 904.0024 | / | +| bs16 | 881.0279 | / | +| bs32 | 863.7933 | / | +| bs64 | 774.4264 | / | diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py index 535e47a3e..d585926e8 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py @@ -25,12 +25,11 @@ def export_torch_aie(opt_args): torch_aie.set_device(0) inputs = [] - inputs.append(torch_aie.Input((opt_args.batch_size, 80, 10))) + inputs.append(torch_aie.Input((opt_args.batch_size, 80, 200))) torchaie_model = torch_aie.compile( trace_model, inputs=inputs, precision_policy=_enums.PrecisionPolicy.FP16, - # precision_policy=_enums.PrecisionPolicy.PREF_FP32, truncate_long_and_double=True, require_full_compilation=False, allow_tensor_replace_int=False, diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py index f54171792..b1fd41009 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py @@ -14,30 +14,22 @@ import torch import torch_aie -import numpy as np import time -import os -import cv2 from tqdm import tqdm -from torch_aie import _enums - -from pathlib import Path - def forward_nms_script(model, dataloader, batchsize, device_id): pred_results = [] inference_time = [] loop_num = 0 - for img, _, _ in tqdm(dataloader): - img = img.contiguous() + for snd, _, _ in tqdm(dataloader): + snd = snd.contiguous() # pt infer - result, inference_time = pt_infer(model, img, device_id, loop_num, inference_time) + result, inference_time = pt_infer(model, snd, device_id, loop_num, inference_time) pred_results.append(result) loop_num += 1 - # print(batchsize, inference_time) avg_inf_time = sum(inference_time) / len(inference_time) / batchsize * 1000 - print('性能(毫秒):', avg_inf_time) + print('performance(ms):', avg_inf_time) print("throughput(fps): ", 1000 / avg_inf_time) return pred_results diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py index ac0574b7a..3cafd2f3f 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py @@ -13,9 +13,6 @@ # limitations under the License. import os -import yaml -import json -import cv2 import argparse import numpy as np import torch @@ -45,8 +42,7 @@ def get_dataloader(keyword='vox1', t_thres=19, batchsize = 16, dataset = "VoxCel shuffle=False, num_workers=1, collate_fn=partial(collate_function, speaker_table=test_speakers, - # max_mel_length=MAX_MEL_LENGTH), - max_mel_length=10), + max_mel_length=200), drop_last=True) return dataset_test, test_speakers @@ -56,7 +52,7 @@ def main(opt): torch_aie.set_device(opt.device_id) if opt.need_compile: inputs = [] - inputs.append(torch_aie.Input((opt.batch_size, 80, 10))) + inputs.append(torch_aie.Input((opt.batch_size, 80, 200))) model = torch_aie.compile( model, inputs=inputs, @@ -77,11 +73,9 @@ def main(opt): if not os.path.exists(output_folder): os.makedirs(output_folder) for index, res in enumerate(pred_results): - # print("res", res) for i, r in enumerate(res): result_fname = 'mels' + str(index * opt.batch_size + i + 1) + '_0.bin' np.array(r.numpy().tofile(os.path.join(output_folder, result_fname))) - if __name__ == '__main__': parser = argparse.ArgumentParser(description='YOLOv3 offline model inference.') parser.add_argument('--data_path', type=str, default="VoxCeleb1", help='root dir for val images and annotations') diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py similarity index 74% rename from AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py rename to AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py index 277863052..d14ce7a0c 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2onnx.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import torch import sys @@ -25,25 +24,14 @@ def pth2onnx(checkpoint, output_file): device = torch.device('cpu') model = ECAPA_TDNN(1211, device).to(device) - optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=2e-5) - model, optimizer, step = load_checkpoint(model, optimizer, checkpoint, rank='cpu') - model.forward = partial(model.forward, infer=True) # 调整模型为eval mode model.eval() - # 输入节点名 - # input_names = ["mel","speaker"] - input_names = ["mel"] - # 输出节点名 - output_names = ["output1", "output2"] - dynamic_axes = {'mel': {0: '-1'}, 'output1': {0: '-1'}, 'output2': {0: '-1'}} dummy_input1 = torch.randn(1, 80, 200).to(device) - # torch.onnx.export(model, dummy_input1, output_file, input_names=input_names, dynamic_axes=dynamic_axes,output_names=output_names, opset_version=12, verbose=True) ts_model = torch.jit.trace(model, dummy_input1) - output_file = 'ecapa_tdnn.torchscript.pt' ts_model.save(output_file) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/FastPitch/pth2ts.py b/AscendIE/TorchAIE/built-in/cv/audio/FastPitch/pth2ts.py index 019413ee4..e2ec6a225 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/FastPitch/pth2ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/FastPitch/pth2ts.py @@ -184,9 +184,8 @@ def main(): generator = torch.jit.script(generator) else: generator = None - bs = args.batch_size - text_padded = torch.LongTensor(bs, 200) + text_padded = torch.LongTensor(1, 200) text_padded.zero_() pth2ts(model=generator, dummy_input=text_padded, output_file=f"fastpitch.torchscript.pt") From 2c2055184cd965b49aaaaf71f264b67edeba34dc Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:28:58 +0800 Subject: [PATCH 4/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=B8=8EREADME=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/cv/audio/Ecapa_Tdnn/README.md | 16 ++++++++-------- .../cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py | 1 + .../built-in/cv/audio/Ecapa_Tdnn/model_pt.py | 2 +- .../built-in/cv/audio/Ecapa_Tdnn/pt_val.py | 2 +- .../built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py | 2 +- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md index c8d625352..8f31483c9 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/README.md @@ -165,11 +165,11 @@ ECAPA-TDNN基于人脸验证和计算机视觉相关领域的最新趋势,对 **表 2** ecapa_tdnn模型精度 -| batchsize | aie性能 | aie精度 | -|------------------------------------------------|----------|---------| -| bs1 | 449.1879 | 0.99905 | -| bs4 | 877.4901 | 0.99909 | -| bs8 | 904.0024 | / | -| bs16 | 881.0279 | / | -| bs32 | 863.7933 | / | -| bs64 | 774.4264 | / | +| batchsize | aie性能(fps) | aie精度 | +|------------------------------------------------|------------|---------| +| bs1 | 449.1879 | 0.99905 | +| bs4 | 877.4901 | 0.99909 | +| bs8 | 904.0024 | / | +| bs16 | 881.0279 | / | +| bs32 | 863.7933 | / | +| bs64 | 774.4264 | / | diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py index d585926e8..181213635 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py @@ -19,6 +19,7 @@ import torch_aie from torch_aie import _enums + def export_torch_aie(opt_args): trace_model = torch.jit.load(opt_args.torch_script_path) trace_model.eval() diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py index b1fd41009..3cffc865b 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py index 3cafd2f3f..78c2970c1 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py index d14ce7a0c..c0663fd99 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 36d8949b49014ee81e90f7dacc31e9e64fb59810 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:34:14 +0800 Subject: [PATCH 5/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py index c0663fd99..a6a7e9644 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py @@ -21,7 +21,7 @@ from functools import partial -def pth2onnx(checkpoint, output_file): +def pth2ts(checkpoint, output_file): device = torch.device('cpu') model = ECAPA_TDNN(1211, device).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=2e-5) @@ -38,4 +38,4 @@ def pth2onnx(checkpoint, output_file): if __name__ == "__main__": checkpoint = sys.argv[1] save_path = sys.argv[2] - pth2onnx(checkpoint, save_path) + pth2ts(checkpoint, save_path) From 22db1cd01f2bafc519d86af7cda47df323c5d6ff Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:36:12 +0800 Subject: [PATCH 6/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py index 3cffc865b..482e2ca62 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/model_pt.py @@ -46,4 +46,4 @@ def pt_infer(model, input_li, device_id, loop_num, inference_time): if loop_num >= 5: # use 5 step to warmup inference_time.append(inf) results = output_npu[0].to("cpu") - return results, inference_time \ No newline at end of file + return results, inference_time From b1a05ac8aef1d6638503a35c97ff66468193b2bd Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:40:52 +0800 Subject: [PATCH 7/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py index 181213635..03f77ab44 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/export_torch_aie_ts.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import os import argparse import torch import torch_aie + from torch_aie import _enums From ca908fa2635574781c280949aacf5a711b07d5b2 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:48:53 +0800 Subject: [PATCH 8/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py index 78c2970c1..c3b5212fb 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py @@ -19,7 +19,7 @@ import torch_aie from torch_aie import _enums from glob import glob -from ECAPA_TDNN.prepare_batch_loader import struct_meta, write_to_csv, read_from_csv, reduce_meta, build_speaker_dict, collate_function +from ECAPA_TDNN.prepare_batch_loader import struct_meta, reduce_meta, build_speaker_dict, collate_function from torch.utils.data import DataLoader from functools import partial from model_pt import forward_nms_script From c322777ae0d562a6849439f51394692be1382870 Mon Sep 17 00:00:00 2001 From: han_yifeng Date: Mon, 4 Dec 2023 17:53:16 +0800 Subject: [PATCH 9/9] =?UTF-8?q?ECAPA=5FTDNN=E6=A8=A1=E5=9E=8B=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E8=84=9A=E6=9C=AC=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py | 4 ++-- AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py index c3b5212fb..11a187e91 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pt_val.py @@ -32,7 +32,7 @@ def load_meta(dataset, keyword='vox1'): test_meta = struct_meta(wav_files_test) return test_meta -def get_dataloader(keyword='vox1', t_thres=19, batchsize = 16, dataset = "VoxCeleb1"): +def get_dataloader(keyword='vox1', t_thres=19, batchsize=16, dataset="VoxCeleb1"): test_meta = load_meta(dataset, keyword) test_meta_ = [meta for meta in (test_meta) if meta[2] < t_thres] test_meta = reduce_meta(test_meta_, speaker_num=-1) @@ -66,7 +66,7 @@ def main(opt): optimization_level=0) # load dataset - dataloader, test_speakers = get_dataloader('vox1', 19, opt.batch_size, dataset=opt.data_path) + dataloader, _ = get_dataloader('vox1', 19, opt.batch_size, dataset=opt.data_path) # inference & nms pred_results = forward_nms_script(model, dataloader, opt.batch_size, opt.device_id) output_folder = f"result/output_bs{opt.batch_size}" diff --git a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py index a6a7e9644..b631e2c45 100644 --- a/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py +++ b/AscendIE/TorchAIE/built-in/cv/audio/Ecapa_Tdnn/pytorch2ts.py @@ -20,10 +20,10 @@ from torch import optim from functools import partial - +NUM_SPEAKERS = 1211 def pth2ts(checkpoint, output_file): device = torch.device('cpu') - model = ECAPA_TDNN(1211, device).to(device) + model = ECAPA_TDNN(NUM_SPEAKERS, device).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=2e-5) model, optimizer, step = load_checkpoint(model, optimizer, checkpoint, rank='cpu') model.forward = partial(model.forward, infer=True)