diff --git a/activate-icefall.sh b/activate-icefall.sh index 6116ca47a..406692094 100644 --- a/activate-icefall.sh +++ b/activate-icefall.sh @@ -1 +1 @@ -export PYTHONPATH=/var/data/share20/qc/k2/Github/icefall:$PYTHONPATH +export PYTHONPATH=/root/k2/Github/icefall:$PYTHONPATH diff --git a/egs/reazonspeech/ASR/decode_greedy.sh b/egs/reazonspeech/ASR/decode_greedy.sh index 4f24d0a4e..97966569b 100755 --- a/egs/reazonspeech/ASR/decode_greedy.sh +++ b/egs/reazonspeech/ASR/decode_greedy.sh @@ -1,15 +1,20 @@ -num_epochs=30 -for ((i=$num_epochs; i>=15; i--)); +num_epochs=40 +for ((i=$num_epochs; i>=1; i--)); do for ((j=1; j<=$i; j++)); do python3 ./zipformer/decode.py \ --epoch $i \ --avg $j \ - --exp-dir zipformer/exp \ - --max-duration 450 \ - --lang data/lang_char \ + --exp-dir zipformer/exp-large \ + --max-duration 600 \ + --causal 0 \ --decoding-method greedy_search \ - --blank-penalty 2 + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + --lang data/lang_char \ + --blank-penalty 0 done done diff --git a/egs/reazonspeech/ASR/decode_modified_beam.sh b/egs/reazonspeech/ASR/decode_modified_beam.sh index a3a0c25a0..3279b0552 100755 --- a/egs/reazonspeech/ASR/decode_modified_beam.sh +++ b/egs/reazonspeech/ASR/decode_modified_beam.sh @@ -1,15 +1,20 @@ -num_epochs=30 -for ((i=$num_epochs; i>=20; i--)); +num_epochs=60 +for ((i=$num_epochs; i>=40; i--)); do - for avg in 12 11 10 9 8 7 6 5; + for ((j=1; j<=$i; j++)); do python3 ./zipformer/decode.py \ --epoch $i \ - --avg $avg \ - --exp-dir zipformer/exp \ - --max-duration 450 \ - --lang data/lang_char \ + --avg $j \ + --exp-dir zipformer/exp-large \ + --max-duration 600 \ + --causal 0 \ --decoding-method modified_beam_search \ - --blank-penalty 2.5 + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + --lang data/lang_char \ + --blank-penalty 0 done done diff --git a/egs/reazonspeech/ASR/zipformer/train.py b/egs/reazonspeech/ASR/zipformer/train.py index cfd7fe909..3ed21ff85 100755 --- a/egs/reazonspeech/ASR/zipformer/train.py +++ b/egs/reazonspeech/ASR/zipformer/train.py @@ -328,7 +328,7 @@ def get_parser(): ) parser.add_argument( - "--base-lr", type=float, default=0.035, help="The base learning rate." + "--base-lr", type=float, default=0.015, help="The base learning rate." ) parser.add_argument( @@ -1069,6 +1069,9 @@ def train_one_epoch( tb_writer, "train/valid_", params.batch_idx_train ) + # print('--------------------debug------------------') + # print(tot_loss) + # print(tot_loss["frames"]) loss_value = tot_loss["loss"] / tot_loss["frames"] params.train_loss = loss_value if params.train_loss < params.best_train_loss: @@ -1179,7 +1182,7 @@ def run(rank, world_size, args): # You should use ../local/display_manifest_statistics.py to get # an utterance duration distribution for your dataset to select # the threshold - if c.duration < 0.3 or c.duration > 30.0: + if c.duration < 1.0 or c.duration > 30.0: # logging.warning( # f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" # )