diff --git a/egs/librilight/SSL/local/extract_kmeans.py b/egs/librilight/SSL/local/extract_kmeans.py index b8f6fcba9e..9b101a4822 100755 --- a/egs/librilight/SSL/local/extract_kmeans.py +++ b/egs/librilight/SSL/local/extract_kmeans.py @@ -18,6 +18,7 @@ import argparse import logging import math +import os from pathlib import Path from typing import Optional @@ -36,6 +37,8 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" + class ApplyKmeans(object): def __init__(self, km_path): diff --git a/egs/librilight/SSL/prepare.sh b/egs/librilight/SSL/prepare.sh index e0a293b2c9..160c66c7da 100755 --- a/egs/librilight/SSL/prepare.sh +++ b/egs/librilight/SSL/prepare.sh @@ -64,7 +64,7 @@ fi if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Split medium and large subset into pieces" - num_per_split=2500 + num_per_split=10000 split_dir=data/kmeans/medium_split if [ ! -f $split_dir/.split_completed ]; then lhotse split-lazy ./data/kmeans/librilight_cuts_medium_raw.jsonl.gz $split_dir $num_per_split