add files

kdrkdrkdr · Jun 28, 2023 · 0fe259e · 0fe259e
1 parent 84ee959
commit 0fe259e
Show file tree

Hide file tree

Showing 11 changed files with 124 additions and 97 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
 *__pycache__*
-dataset/
+dataset/*
 logs/
 test.py
diff --git a/README.md b/README.md
@@ -1,2 +1,104 @@
 # JK-VITS
 Bilingual-TTS (Japanese and Korean)
+This Repository can speak Japanese even if you train with Korean dataset, and can speak Korean even if you train with Japanese dataset.
+By transcribing pronunciation from Japanese to Korean and Korean to Japanese, the unstable voice produced when using the existing multilingual ipa cleaners has been improved.
+
+
+
+## Table of Contents 
+- [Prerequisites](#prerequisites)
+- [Installation](#installation)
+- [Prepare_Datasets](#Prepare_Datasets)
+- [Usage](#usage)
+- [Inference](#inference)
+- [References](#References)
+
+
+## Pre-requisites
+- A Windows/Linux system with a minimum of `16GB` RAM.
+- A GPU with at least `12GB` of VRAM.
+- Python >= 3.8
+- Anaconda installed.
+- PyTorch installed.
+- CUDA 11.7 installed.
+
+
+
+Pytorch install command:
+```sh
+pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117
+```
+CUDA 11.7 Install:
+`https://developer.nvidia.com/cuda-11-7-0-download-archive`
+CUDNN 11.x Install:
+`https://developer.nvidia.com/rdp/cudnn-archive`
+
+
+---
+## Installation 
+1. **Create an Anaconda environment:**
+
+```sh
+conda create -n jk-vits python=3.8
+```
+
+2. **Activate the environment:**
+
+```sh
+conda activate jk-vits
+```
+
+3. **Clone this repository to your local machine:**
+
+```sh
+git clone https://github.com/kdrkdrkdr/JK-VITS.git
+```
+
+4. **Navigate to the cloned directory:**
+
+```sh
+cd JK-VITS
+```
+
+5. **Install the necessary dependencies:**
+
+```sh
+pip install -r requirements.txt
+pip install -U pyopenjtalk==0.2.0 --no-build-isolation
+```
+---
+
+## Preparing Dataset Example
+
+- Place the audio files as follows. 
+.wav files are okay. The sample rate of the audio must be 44100 Hz.
+
+
+- Preprocessing (g2p) for your own datasets. Preprocessed phonemes for your dataset.
+```sh
+python preprocess.py --filelists filelists/train.txt filelists/val.txt
+```
+
+- Set configs.
+If you train with japanese dataset, refer [configs/ja.json](configs/ja.json)
+If you train with korean dataset, refer [configs/ko.json](configs/ko.json)
+---
+
+## Training Exmaple
+```sh
+python train.py -c configs/ft.json -m ft
+```
+
+
+---
+## Inference Exmaple
+See [inference.ipynb](inference.ipynb)
+
+
+
+---
+## References
+For more information, please refer to the following repositories: 
+- [jaywalnut310/vits](https://github.com/jaywalnut310/vits.git)
+- [MasayaKawamura/MB-iSTFT-VITS](https://github.com/MasayaKawamura/)
+- [Kyubyong/g2pK](https://github.com/Kyubyong/g2pK)
diff --git a/configs/ja.json b/configs/ja.json
@@ -21,6 +21,8 @@
       "window": "hann_window"  
     },
     "data": {
+      "is_japanese_dataset":true,
+      "is_korean_dataset":false,
       "training_files":"filelists/ja_train.txt.cleaned",
       "validation_files":"filelists/ja_val.txt.cleaned",
       "text_cleaners":["jk_cleaners"],

diff --git a/configs/ko.json b/configs/ko.json
@@ -21,6 +21,8 @@
     "window": "hann_window"  
   },
   "data": {
+    "is_japanese_dataset":false,
+    "is_korean_dataset":true,
     "training_files":"filelists/ko_train.txt.cleaned",
     "validation_files":"filelists/ko_val.txt.cleaned",
     "text_cleaners":["jk_cleaners"],

diff --git a/configs/mari.json b/configs/mari.json
diff --git a/filelists/train.txt b/filelists/train.txt
diff --git a/filelists/val.txt b/filelists/val.txt
diff --git a/inference.ipynb b/inference.ipynb
diff --git a/preprocess.py b/preprocess.py
@@ -6,7 +6,7 @@
   parser = argparse.ArgumentParser()
   parser.add_argument("--out_extension", default="cleaned")
   parser.add_argument("--text_index", default=1, type=int)
-  parser.add_argument("--filelists", nargs="+", default=["filelists/mari_train.txt", "filelists/mari_val.txt"])
+  parser.add_argument("--filelists", nargs="+", default=["filelists/train.txt", "filelists/val.txt"])
   parser.add_argument("--text_cleaners", nargs="+", default=["jk_cleaners"])
 
   args = parser.parse_args()

diff --git a/requirements.txt b/requirements.txt
@@ -1,31 +1,17 @@
-# utils
 cmake
 ffmpeg
-
-# torch
---extra-index-url https://download.pytorch.org/whl/cu117
-torch==1.13.1+cu117
-torchvision==0.14.1+cu117
-torchaudio==0.13.1
-
-# vits
-Cython==0.29.21
-librosa==0.8.0
 matplotlib==3.3.1
-numpy==1.18.5
-scipy==1.5.2
 tensorboard==2.3.0
 Unidecode==1.1.1
 pysoundfile==0.9.0.post1
 monotonic-align
 g2pk2
-eunjeon
 ko_pron==1.3
 jamo==0.4.1
-pyopenjtalk==0.2.0
+# pyopenjtalk==0.2.0
+jaconv
 protobuf==3.19.0
-
-# # Nuwave2
-# prefetch_generator
-# omegaconf==2.0.6
-# pytorch_lightning==1.2.10
+Cython==0.29.21
+librosa==0.8.0
+numpy==1.18.5
+scipy==1.5.2
diff --git a/train_latest.py → train.py b/train_latest.py → train.py