mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-06 15:44:17 +00:00
comments about disk usage and training example script
This commit is contained in:
parent
c026a97d41
commit
15220797e3
@ -1,4 +1,4 @@
|
|||||||
stage=4
|
stage=3
|
||||||
|
|
||||||
# Parameters about model.
|
# Parameters about model.
|
||||||
exp_dir=./vq_pruned_transducer_stateless2/exp/
|
exp_dir=./vq_pruned_transducer_stateless2/exp/
|
||||||
@ -7,11 +7,15 @@ hubert_model_dir=${exp_dir}/hubert_models
|
|||||||
hubert_model=${hubert_model_dir}/${model_id}.pt
|
hubert_model=${hubert_model_dir}/${model_id}.pt
|
||||||
|
|
||||||
# Parameters about quantizer.
|
# Parameters about quantizer.
|
||||||
|
memory_layer=36 # 1-based
|
||||||
|
|
||||||
|
|
||||||
|
# Make sure following parameters are identical to that in hubert_utils.vq_config
|
||||||
num_utts=1000
|
num_utts=1000
|
||||||
mem_layer=36
|
|
||||||
bytes_per_frame=8
|
bytes_per_frame=8
|
||||||
enable_refine=True
|
enable_refine=True
|
||||||
|
|
||||||
|
|
||||||
if [ $stage -eq -1 ]; then
|
if [ $stage -eq -1 ]; then
|
||||||
# Preparation state.
|
# Preparation state.
|
||||||
|
|
||||||
@ -27,6 +31,10 @@ if [ $stage -eq -1 ]; then
|
|||||||
wget -c wget https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt -P ${hubert_model_dir}
|
wget -c wget https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt -P ${hubert_model_dir}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -d ./data/fbank ]; then
|
||||||
|
echo "This script assumes ./data/fbank is already generated by prepare.sh"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
if [ $stage -eq 0 ]; then
|
if [ $stage -eq 0 ]; then
|
||||||
# This stage is not directly used by codebook extraction.
|
# This stage is not directly used by codebook extraction.
|
||||||
@ -36,17 +44,17 @@ if [ $stage -eq 0 ]; then
|
|||||||
# [test-clean-ctc_greedy_search] %WER 2.04% [1075 / 52576, 92 ins, 104 del, 879 sub ]
|
# [test-clean-ctc_greedy_search] %WER 2.04% [1075 / 52576, 92 ins, 104 del, 879 sub ]
|
||||||
# [test-other-ctc_greedy_search] %WER 3.71% [1942 / 52343, 152 ins, 126 del, 1664 sub ]
|
# [test-other-ctc_greedy_search] %WER 3.71% [1942 / 52343, 152 ins, 126 del, 1664 sub ]
|
||||||
export CUDA_VISIBLE_DEVICES=7
|
export CUDA_VISIBLE_DEVICES=7
|
||||||
./vq_pruned_transducer_stateless2/hubert_decode.py \
|
./vq_pruned_transducer_stateless2/hubert_decode.py
|
||||||
--max-duration 10
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -eq 1 ]; then
|
if [ $stage -eq 1 ]; then
|
||||||
./vq_pruned_transducer_stateless2/hubert_memory_embeddings.py \
|
./vq_pruned_transducer_stateless2/hubert_memory_embeddings.py \
|
||||||
--max-duration 10
|
--memory-layer=${memory_layer}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -eq 2 ]; then
|
if [ $stage -eq 2 ]; then
|
||||||
./vq_pruned_transducer_stateless2/quantizer_train.py
|
./vq_pruned_transducer_stateless2/quantizer_train.py \
|
||||||
|
--memory-layer=${memory_layer}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# CAUTITHON: set quantizer_id MANUALLY when a new quantizer is used.
|
# CAUTITHON: set quantizer_id MANUALLY when a new quantizer is used.
|
||||||
@ -75,12 +83,22 @@ if [ $stage -eq 4 ]; then
|
|||||||
refine_iter=5
|
refine_iter=5
|
||||||
|
|
||||||
extract_codebook_index(){
|
extract_codebook_index(){
|
||||||
|
# Analysis of disk usage:
|
||||||
|
# With bytes_per_frame=8, each embedding is compressed into eight 8-bit integers, i.e. 8 bytes needed.
|
||||||
|
# Training dataset including clean-100h with speed perturb 0.9 and 1.1 has 300 hours.
|
||||||
|
# The output frame rates of Hubert is 50 per second.
|
||||||
|
# Theoretically, 412M = 300 * 3600 * 50 * 8 / 1024 / 1024 is needed.
|
||||||
|
# The actual size of all "*.h5" files storaging codebook index is 450M.
|
||||||
|
# I think the extra "48M" usage is some meta information.
|
||||||
|
#
|
||||||
|
# About CUDA_VISIBLE_DEVICES:
|
||||||
# When I testing this code, gpu 6 and 7 are available,
|
# When I testing this code, gpu 6 and 7 are available,
|
||||||
# So the CUDA_VISIBLE_DEVICES is (1 + 5) for job 0
|
# So the CUDA_VISIBLE_DEVICES is (1 + 5) for job 0
|
||||||
# and (2 + 5) for job 1
|
# and (2 + 5) for job 1
|
||||||
# Note: order of split manfiests is 1-based, while gpu is 0-based.
|
# Note: order of split manfiests is 1-based, while gpu is 0-based.
|
||||||
export CUDA_VISIBLE_DEVICES=`(expr $1 + 5)`
|
export CUDA_VISIBLE_DEVICES=`(expr $1 + 5)`
|
||||||
./vq_pruned_transducer_stateless2/hubert_code_indices.py \
|
./vq_pruned_transducer_stateless2/hubert_code_indices.py \
|
||||||
|
--memory-layer=${memory_layer}
|
||||||
--num-splits $num_jobs \
|
--num-splits $num_jobs \
|
||||||
--subset=$2 \
|
--subset=$2 \
|
||||||
--manifest-idx $1 \
|
--manifest-idx $1 \
|
||||||
@ -98,11 +116,36 @@ if [ $stage -eq 4 ]; then
|
|||||||
done
|
done
|
||||||
wait
|
wait
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
cdidx_manifests_dir=`pwd`/data/globalrandom-scaledquantizer-refine_iter-5-${num_utts}-$model_id-${mem_layer}layer-${quantizer_id}-bytes_per_frame-${bytes_per_frame}-enable-refine-True
|
||||||
if [ $stage -eq 5 ]; then
|
if [ $stage -eq 5 ]; then
|
||||||
for subset in ${train_subset}; do
|
for subset in ${train_subsets}; do
|
||||||
cdidx_manifests_dir=`pwd`/data/$model_id-${mem_layer}layer-${quantizer_id}-bytes_per_frame-${bytes_per_frame}
|
|
||||||
combined_list=`find $cdidx_manifests_dir/splits$num_jobs/ -name cuts_train-${sbuset}*`
|
combined_list=`find $cdidx_manifests_dir/splits$num_jobs/ -name cuts_train-${sbuset}*`
|
||||||
echo $combined_list
|
echo $combined_list
|
||||||
lhotse combine $combined_list $cdidx_manifests_dir/cuts_train-${subset}.json.gz
|
lhotse combine $combined_list $cdidx_manifests_dir/cuts_train-${subset}.json.gz
|
||||||
done
|
done
|
||||||
|
|
||||||
|
reuseable_subsets="dev-clean dev-other test-clean test-other musan"
|
||||||
|
for subset in $reuseable_subsets; do
|
||||||
|
ori_manifest=./data/fbank/cuts_${subset}.json.gz
|
||||||
|
ln -sf `realpath ./data/fbank/cuts_${subset}.json.gz` ${cdidx_manifests_dir}
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
if [ $stage -eq 6 ]; then
|
||||||
|
# Example training script.
|
||||||
|
# Note: it's better to set spec-aug-time-warpi-factor=-1
|
||||||
|
export CUDA_VISIBLE_DEVICES="4,5,6"
|
||||||
|
WORLD_SIZE=3
|
||||||
|
python3 ./vq_pruned_transducer_stateless2/train.py \
|
||||||
|
--codebook-loss-scale 0.1 \
|
||||||
|
--num-codebooks=${bytes_per_frame} \
|
||||||
|
--start-epoch 0 \
|
||||||
|
--master-port 12358 \
|
||||||
|
--manifest-dir ${cdidx_manifests_dir} \
|
||||||
|
--full-libri 0 \
|
||||||
|
--spec-aug-time-warp-factor -1 \
|
||||||
|
--max-duration 300 \
|
||||||
|
--world-size ${WORLD_SIZE} \
|
||||||
|
--num-epochs 30 \
|
||||||
|
--codebook-loss-scale 0.1
|
||||||
fi
|
fi
|
||||||
|
@ -31,17 +31,16 @@ from fairseq.models.hubert.hubert import HubertModel
|
|||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
|
|
||||||
vq_config = {
|
vq_config = {
|
||||||
|
# TODO: Maybe better to convert this class to yaml driven config.
|
||||||
# parameters about hubert model inference.
|
# parameters about hubert model inference.
|
||||||
"model_dir": "./vq_pruned_transducer_stateless2/exp/hubert_models/",
|
"model_dir": "./vq_pruned_transducer_stateless2/exp/hubert_models/",
|
||||||
"model_id": "hubert_xtralarge_ll60k_finetune_ls960",
|
|
||||||
"input_strategy": "AudioSamples",
|
"input_strategy": "AudioSamples",
|
||||||
"enable_spec_aug": False,
|
"enable_spec_aug": False,
|
||||||
"enable_musan": False,
|
"enable_musan": False,
|
||||||
"total_layers": 48,
|
"total_layers": 48,
|
||||||
"memory_embedding_dim": 1280,
|
"memory_embedding_dim": 1280,
|
||||||
# parameters about quantizer.
|
# parameters about quantizer.
|
||||||
"num_utts": 100,
|
"num_utts": 1000,
|
||||||
"memory_layer": 36,
|
|
||||||
"memory_dir": "./vq_pruned_transducer_stateless2/exp/mem/",
|
"memory_dir": "./vq_pruned_transducer_stateless2/exp/mem/",
|
||||||
"bytes_per_frame": 8,
|
"bytes_per_frame": 8,
|
||||||
"refine_iter": 5,
|
"refine_iter": 5,
|
||||||
@ -62,9 +61,19 @@ def get_parser():
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--manifest-idx",
|
"--model-id",
|
||||||
|
type=str,
|
||||||
|
default="hubert_xtralarge_ll60k_finetune_ls960",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--manifest-idx", type=int, help="Split manifest is 1-based."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--memory-layer",
|
||||||
type=int,
|
type=int,
|
||||||
help="Split manifest is 1-based."
|
help="layer to extract teacher embeddings, 1-based.",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user