remove streaming/greedy_search results folder

This commit is contained in:
root 2024-08-01 16:00:27 +09:00 committed by root
parent eebe6add4c
commit cfef53dcbe
6 changed files with 148 additions and 171 deletions

View File

@ -103,9 +103,10 @@ from pathlib import Path
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import k2 import k2
import sentencepiece as spm
import torch import torch
import torch.nn as nn import torch.nn as nn
from asr_datamodule import ReazonSpeechAsrDataModule from asr_datamodule import LibriSpeechAsrDataModule
from beam_search import ( from beam_search import (
beam_search, beam_search,
fast_beam_search_nbest, fast_beam_search_nbest,
@ -134,7 +135,6 @@ from icefall.checkpoint import (
from icefall.lexicon import Lexicon from icefall.lexicon import Lexicon
from icefall.utils import ( from icefall.utils import (
AttributeDict, AttributeDict,
make_pad_mask,
setup_logger, setup_logger,
store_transcripts, store_transcripts,
str2bool, str2bool,
@ -205,7 +205,7 @@ def get_parser():
parser.add_argument( parser.add_argument(
"--lang-dir", "--lang-dir",
type=Path, type=Path,
default="data/lang_char", default="data/lang_bpe_500",
help="The lang dir containing word table and LG graph", help="The lang dir containing word table and LG graph",
) )
@ -371,6 +371,7 @@ def get_parser():
modified_beam_search_LODR. modified_beam_search_LODR.
""", """,
) )
<<<<<<< HEAD
parser.add_argument( parser.add_argument(
"--skip-scoring", "--skip-scoring",
@ -398,7 +399,7 @@ def get_parser():
def decode_one_batch( def decode_one_batch(
params: AttributeDict, params: AttributeDict,
model: nn.Module, model: nn.Module,
sp: Tokenizer, sp: spm.SentencePieceProcessor,
batch: dict, batch: dict,
word_table: Optional[k2.SymbolTable] = None, word_table: Optional[k2.SymbolTable] = None,
decoding_graph: Optional[k2.Fsa] = None, decoding_graph: Optional[k2.Fsa] = None,
@ -477,10 +478,9 @@ def decode_one_batch(
beam=params.beam, beam=params.beam,
max_contexts=params.max_contexts, max_contexts=params.max_contexts,
max_states=params.max_states, max_states=params.max_states,
blank_penalty=params.blank_penalty,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "fast_beam_search_nbest_LG": elif params.decoding_method == "fast_beam_search_nbest_LG":
hyp_tokens = fast_beam_search_nbest_LG( hyp_tokens = fast_beam_search_nbest_LG(
model=model, model=model,
@ -492,7 +492,6 @@ def decode_one_batch(
max_states=params.max_states, max_states=params.max_states,
num_paths=params.num_paths, num_paths=params.num_paths,
nbest_scale=params.nbest_scale, nbest_scale=params.nbest_scale,
blank_penalty=params.blank_penalty,
) )
for hyp in hyp_tokens: for hyp in hyp_tokens:
hyps.append([word_table[i] for i in hyp]) hyps.append([word_table[i] for i in hyp])
@ -507,10 +506,9 @@ def decode_one_batch(
max_states=params.max_states, max_states=params.max_states,
num_paths=params.num_paths, num_paths=params.num_paths,
nbest_scale=params.nbest_scale, nbest_scale=params.nbest_scale,
blank_penalty=params.blank_penalty,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "fast_beam_search_nbest_oracle": elif params.decoding_method == "fast_beam_search_nbest_oracle":
hyp_tokens = fast_beam_search_nbest_oracle( hyp_tokens = fast_beam_search_nbest_oracle(
model=model, model=model,
@ -523,19 +521,17 @@ def decode_one_batch(
num_paths=params.num_paths, num_paths=params.num_paths,
ref_texts=sp.encode(supervisions["text"]), ref_texts=sp.encode(supervisions["text"]),
nbest_scale=params.nbest_scale, nbest_scale=params.nbest_scale,
blank_penalty=params.blank_penalty,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1:
hyp_tokens = greedy_search_batch( hyp_tokens = greedy_search_batch(
model=model, model=model,
encoder_out=encoder_out, encoder_out=encoder_out,
encoder_out_lens=encoder_out_lens, encoder_out_lens=encoder_out_lens,
blank_penalty=params.blank_penalty,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "modified_beam_search": elif params.decoding_method == "modified_beam_search":
hyp_tokens = modified_beam_search( hyp_tokens = modified_beam_search(
model=model, model=model,
@ -543,10 +539,9 @@ def decode_one_batch(
encoder_out_lens=encoder_out_lens, encoder_out_lens=encoder_out_lens,
beam=params.beam_size, beam=params.beam_size,
context_graph=context_graph, context_graph=context_graph,
blank_penalty=params.blank_penalty,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "modified_beam_search_lm_shallow_fusion": elif params.decoding_method == "modified_beam_search_lm_shallow_fusion":
hyp_tokens = modified_beam_search_lm_shallow_fusion( hyp_tokens = modified_beam_search_lm_shallow_fusion(
model=model, model=model,
@ -556,7 +551,7 @@ def decode_one_batch(
LM=LM, LM=LM,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "modified_beam_search_LODR": elif params.decoding_method == "modified_beam_search_LODR":
hyp_tokens = modified_beam_search_LODR( hyp_tokens = modified_beam_search_LODR(
model=model, model=model,
@ -569,7 +564,7 @@ def decode_one_batch(
context_graph=context_graph, context_graph=context_graph,
) )
for hyp in sp.decode(hyp_tokens): for hyp in sp.decode(hyp_tokens):
hyps.append(sp.text2word(hyp)) hyps.append(hyp.split())
elif params.decoding_method == "modified_beam_search_lm_rescore": elif params.decoding_method == "modified_beam_search_lm_rescore":
lm_scale_list = [0.01 * i for i in range(10, 50)] lm_scale_list = [0.01 * i for i in range(10, 50)]
ans_dict = modified_beam_search_lm_rescore( ans_dict = modified_beam_search_lm_rescore(
@ -615,7 +610,7 @@ def decode_one_batch(
raise ValueError( raise ValueError(
f"Unsupported decoding method: {params.decoding_method}" f"Unsupported decoding method: {params.decoding_method}"
) )
hyps.append(sp.text2word(sp.decode(hyp))) hyps.append(sp.decode(hyp).split())
# prefix = ( "greedy_search" | "fast_beam_search_nbest" | "modified_beam_search" ) # prefix = ( "greedy_search" | "fast_beam_search_nbest" | "modified_beam_search" )
prefix = f"{params.decoding_method}" prefix = f"{params.decoding_method}"
@ -636,9 +631,9 @@ def decode_one_batch(
elif "modified_beam_search" in params.decoding_method: elif "modified_beam_search" in params.decoding_method:
prefix += f"_beam-size-{params.beam_size}" prefix += f"_beam-size-{params.beam_size}"
if params.decoding_method in ( if params.decoding_method in (
"modified_beam_search_lm_rescore", "modified_beam_search_lm_rescore",
"modified_beam_search_lm_rescore_LODR", "modified_beam_search_lm_rescore_LODR",
): ):
ans = dict() ans = dict()
assert ans_dict is not None assert ans_dict is not None
for key, hyps in ans_dict.items(): for key, hyps in ans_dict.items():
@ -655,17 +650,17 @@ def decode_one_batch(
def decode_dataset( def decode_dataset(
dl: torch.utils.data.DataLoader, dl: torch.utils.data.DataLoader,
params: AttributeDict, params: AttributeDict,
model: nn.Module, model: nn.Module,
sp: Tokenizer, sp: spm.SentencePieceProcessor,
word_table: Optional[k2.SymbolTable] = None, word_table: Optional[k2.SymbolTable] = None,
decoding_graph: Optional[k2.Fsa] = None, decoding_graph: Optional[k2.Fsa] = None,
context_graph: Optional[ContextGraph] = None, context_graph: Optional[ContextGraph] = None,
LM: Optional[LmScorer] = None, LM: Optional[LmScorer] = None,
ngram_lm=None, ngram_lm=None,
ngram_lm_scale: float = 0.0, ngram_lm_scale: float = 0.0,
) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: ) -> Dict[str, List[Tuple[str, List[str], List[str]]]]:
"""Decode dataset. """Decode dataset.
Args: Args:
@ -708,23 +703,23 @@ def decode_dataset(
cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] cut_ids = [cut.id for cut in batch["supervisions"]["cut"]]
hyps_dict = decode_one_batch( hyps_dict = decode_one_batch(
params=params, params=params,
model=model, model=model,
sp=sp, sp=sp,
decoding_graph=decoding_graph, decoding_graph=decoding_graph,
context_graph=context_graph, context_graph=context_graph,
word_table=word_table, word_table=word_table,
batch=batch, batch=batch,
LM=LM, LM=LM,
ngram_lm=ngram_lm, ngram_lm=ngram_lm,
ngram_lm_scale=ngram_lm_scale, ngram_lm_scale=ngram_lm_scale,
) )
for name, hyps in hyps_dict.items(): for name, hyps in hyps_dict.items():
this_batch = [] this_batch = []
assert len(hyps) == len(texts) assert len(hyps) == len(texts)
for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts):
ref_words = sp.text2word(ref_text) ref_words = ref_text.split()
this_batch.append((cut_id, ref_words, hyp_words)) this_batch.append((cut_id, ref_words, hyp_words))
results[name].extend(this_batch) results[name].extend(this_batch)
@ -739,10 +734,10 @@ def decode_dataset(
def save_asr_output( def save_asr_output(
params: AttributeDict, params: AttributeDict,
test_set_name: str, test_set_name: str,
results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]], results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]],
): ):
""" """
Save text produced by ASR. Save text produced by ASR.
""" """
@ -757,10 +752,10 @@ def save_asr_output(
def save_wer_results( def save_wer_results(
params: AttributeDict, params: AttributeDict,
test_set_name: str, test_set_name: str,
results_dict: Dict[str, List[Tuple[str, List[str], List[str], Tuple]]], results_dict: Dict[str, List[Tuple[str, List[str], List[str], Tuple]]],
): ):
""" """
Save WER and per-utterance word alignments. Save WER and per-utterance word alignments.
""" """
@ -771,8 +766,8 @@ def save_wer_results(
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt" errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
with open(errs_filename, "w", encoding="utf8") as fd: with open(errs_filename, "w", encoding="utf8") as fd:
wer = write_error_stats( wer = write_error_stats(
fd, f"{test_set_name}-{key}", results, enable_log=True fd, f"{test_set_name}-{key}", results, enable_log=True
) )
test_set_wers[key] = wer test_set_wers[key] = wer
logging.info(f"Wrote detailed error stats to {errs_filename}") logging.info(f"Wrote detailed error stats to {errs_filename}")
@ -797,8 +792,8 @@ def save_wer_results(
@torch.no_grad() @torch.no_grad()
def main(): def main():
parser = get_parser() parser = get_parser()
ReazonSpeechAsrDataModule.add_arguments(parser) LibriSpeechAsrDataModule.add_arguments(parser)
Tokenizer.add_arguments(parser) LmScorer.add_arguments(parser)
args = parser.parse_args() args = parser.parse_args()
args.exp_dir = Path(args.exp_dir) args.exp_dir = Path(args.exp_dir)
@ -809,18 +804,18 @@ def main():
set_caching_enabled(True) # lhotse set_caching_enabled(True) # lhotse
assert params.decoding_method in ( assert params.decoding_method in (
"greedy_search", "greedy_search",
"beam_search", "beam_search",
"fast_beam_search", "fast_beam_search",
"fast_beam_search_nbest", "fast_beam_search_nbest",
"fast_beam_search_nbest_LG", "fast_beam_search_nbest_LG",
"fast_beam_search_nbest_oracle", "fast_beam_search_nbest_oracle",
"modified_beam_search", "modified_beam_search",
"modified_beam_search_LODR", "modified_beam_search_LODR",
"modified_beam_search_lm_shallow_fusion", "modified_beam_search_lm_shallow_fusion",
"modified_beam_search_lm_rescore", "modified_beam_search_lm_rescore",
"modified_beam_search_lm_rescore_LODR", "modified_beam_search_lm_rescore_LODR",
) )
params.res_dir = params.exp_dir / params.decoding_method params.res_dir = params.exp_dir / params.decoding_method
if os.path.exists(params.context_file): if os.path.exists(params.context_file):
@ -835,11 +830,11 @@ def main():
if params.causal: if params.causal:
assert ( assert (
"," not in params.chunk_size "," not in params.chunk_size
), "chunk_size should be one value in decoding." ), "chunk_size should be one value in decoding."
assert ( assert (
"," not in params.left_context_frames "," not in params.left_context_frames
), "left_context_frames should be one value in decoding." ), "left_context_frames should be one value in decoding."
params.suffix += f"_chunk-{params.chunk_size}" params.suffix += f"_chunk-{params.chunk_size}"
params.suffix += f"_left-context-{params.left_context_frames}" params.suffix += f"_left-context-{params.left_context_frames}"
@ -855,9 +850,9 @@ def main():
elif "beam_search" in params.decoding_method: elif "beam_search" in params.decoding_method:
params.suffix += f"__{params.decoding_method}__beam-size-{params.beam_size}" params.suffix += f"__{params.decoding_method}__beam-size-{params.beam_size}"
if params.decoding_method in ( if params.decoding_method in (
"modified_beam_search", "modified_beam_search",
"modified_beam_search_LODR", "modified_beam_search_LODR",
): ):
if params.has_contexts: if params.has_contexts:
params.suffix += f"-context-score-{params.context_score}" params.suffix += f"-context-score-{params.context_score}"
else: else:
@ -869,10 +864,8 @@ def main():
if "LODR" in params.decoding_method: if "LODR" in params.decoding_method:
params.suffix += ( params.suffix += (
f"_LODR-{params.tokens_ngram}gram-scale-{params.ngram_lm_scale}" f"_LODR-{params.tokens_ngram}gram-scale-{params.ngram_lm_scale}"
) )
params.suffix += f"-blank-penalty-{params.blank_penalty}"
if params.use_averaged_model: if params.use_averaged_model:
params.suffix += "_use-averaged-model" params.suffix += "_use-averaged-model"
@ -886,9 +879,10 @@ def main():
logging.info(f"Device: {device}") logging.info(f"Device: {device}")
sp = Tokenizer.load(params.lang, params.lang_type) sp = spm.SentencePieceProcessor()
sp.load(params.bpe_model)
# <blk> and <unk> are defined in local/prepare_lang_char.py # <blk> and <unk> are defined in local/train_bpe_model.py
params.blank_id = sp.piece_to_id("<blk>") params.blank_id = sp.piece_to_id("<blk>")
params.unk_id = sp.piece_to_id("<unk>") params.unk_id = sp.piece_to_id("<unk>")
params.vocab_size = sp.get_piece_size() params.vocab_size = sp.get_piece_size()
@ -901,18 +895,18 @@ def main():
if not params.use_averaged_model: if not params.use_averaged_model:
if params.iter > 0: if params.iter > 0:
filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
: params.avg : params.avg
] ]
if len(filenames) == 0: if len(filenames) == 0:
raise ValueError( raise ValueError(
f"No checkpoints found for" f"No checkpoints found for"
f" --iter {params.iter}, --avg {params.avg}" f" --iter {params.iter}, --avg {params.avg}"
) )
elif len(filenames) < params.avg: elif len(filenames) < params.avg:
raise ValueError( raise ValueError(
f"Not enough checkpoints ({len(filenames)}) found for" f"Not enough checkpoints ({len(filenames)}) found for"
f" --iter {params.iter}, --avg {params.avg}" f" --iter {params.iter}, --avg {params.avg}"
) )
logging.info(f"averaging {filenames}") logging.info(f"averaging {filenames}")
model.to(device) model.to(device)
model.load_state_dict(average_checkpoints(filenames, device=device)) model.load_state_dict(average_checkpoints(filenames, device=device))
@ -930,32 +924,32 @@ def main():
else: else:
if params.iter > 0: if params.iter > 0:
filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
: params.avg + 1 : params.avg + 1
] ]
if len(filenames) == 0: if len(filenames) == 0:
raise ValueError( raise ValueError(
f"No checkpoints found for" f"No checkpoints found for"
f" --iter {params.iter}, --avg {params.avg}" f" --iter {params.iter}, --avg {params.avg}"
) )
elif len(filenames) < params.avg + 1: elif len(filenames) < params.avg + 1:
raise ValueError( raise ValueError(
f"Not enough checkpoints ({len(filenames)}) found for" f"Not enough checkpoints ({len(filenames)}) found for"
f" --iter {params.iter}, --avg {params.avg}" f" --iter {params.iter}, --avg {params.avg}"
) )
filename_start = filenames[-1] filename_start = filenames[-1]
filename_end = filenames[0] filename_end = filenames[0]
logging.info( logging.info(
"Calculating the averaged model over iteration checkpoints" "Calculating the averaged model over iteration checkpoints"
f" from {filename_start} (excluded) to {filename_end}" f" from {filename_start} (excluded) to {filename_end}"
) )
model.to(device) model.to(device)
model.load_state_dict( model.load_state_dict(
average_checkpoints_with_averaged_model( average_checkpoints_with_averaged_model(
filename_start=filename_start, filename_start=filename_start,
filename_end=filename_end, filename_end=filename_end,
device=device, device=device,
) )
) )
else: else:
assert params.avg > 0, params.avg assert params.avg > 0, params.avg
start = params.epoch - params.avg start = params.epoch - params.avg
@ -963,34 +957,34 @@ def main():
filename_start = f"{params.exp_dir}/epoch-{start}.pt" filename_start = f"{params.exp_dir}/epoch-{start}.pt"
filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt"
logging.info( logging.info(
f"Calculating the averaged model over epoch range from " f"Calculating the averaged model over epoch range from "
f"{start} (excluded) to {params.epoch}" f"{start} (excluded) to {params.epoch}"
) )
model.to(device) model.to(device)
model.load_state_dict( model.load_state_dict(
average_checkpoints_with_averaged_model( average_checkpoints_with_averaged_model(
filename_start=filename_start, filename_start=filename_start,
filename_end=filename_end, filename_end=filename_end,
device=device, device=device,
) )
) )
model.to(device) model.to(device)
model.eval() model.eval()
# only load the neural network LM if required # only load the neural network LM if required
if params.use_shallow_fusion or params.decoding_method in ( if params.use_shallow_fusion or params.decoding_method in (
"modified_beam_search_lm_rescore", "modified_beam_search_lm_rescore",
"modified_beam_search_lm_rescore_LODR", "modified_beam_search_lm_rescore_LODR",
"modified_beam_search_lm_shallow_fusion", "modified_beam_search_lm_shallow_fusion",
"modified_beam_search_LODR", "modified_beam_search_LODR",
): ):
LM = LmScorer( LM = LmScorer(
lm_type=params.lm_type, lm_type=params.lm_type,
params=params, params=params,
device=device, device=device,
lm_scale=params.lm_scale, lm_scale=params.lm_scale,
) )
LM.to(device) LM.to(device)
LM.eval() LM.eval()
else: else:
@ -1016,10 +1010,10 @@ def main():
lm_filename = f"{params.tokens_ngram}gram.fst.txt" lm_filename = f"{params.tokens_ngram}gram.fst.txt"
logging.info(f"Loading token level lm: {lm_filename}") logging.info(f"Loading token level lm: {lm_filename}")
ngram_lm = NgramLm( ngram_lm = NgramLm(
str(params.lang_dir / lm_filename), str(params.lang_dir / lm_filename),
backoff_id=params.backoff_id, backoff_id=params.backoff_id,
is_binary=False, is_binary=False,
) )
logging.info(f"num states: {ngram_lm.lm.num_states}") logging.info(f"num states: {ngram_lm.lm.num_states}")
ngram_lm_scale = params.ngram_lm_scale ngram_lm_scale = params.ngram_lm_scale
else: else:
@ -1033,8 +1027,8 @@ def main():
lg_filename = params.lang_dir / "LG.pt" lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}") logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict( decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device) torch.load(lg_filename, map_location=device)
) )
decoding_graph.scores *= params.ngram_lm_scale decoding_graph.scores *= params.ngram_lm_scale
else: else:
word_table = None word_table = None
@ -1060,40 +1054,36 @@ def main():
# we need cut ids to display recognition results. # we need cut ids to display recognition results.
args.return_cuts = True args.return_cuts = True
reazonspeech_corpus = ReazonSpeechAsrDataModule(args) librispeech = LibriSpeechAsrDataModule(args)
for subdir in ["valid"]: test_clean_cuts = librispeech.test_clean_cuts()
test_other_cuts = librispeech.test_other_cuts()
test_clean_dl = librispeech.test_dataloaders(test_clean_cuts)
test_other_dl = librispeech.test_dataloaders(test_other_cuts)
test_sets = ["test-clean", "test-other"]
test_dl = [test_clean_dl, test_other_dl]
for test_set, test_dl in zip(test_sets, test_dl):
results_dict = decode_dataset( results_dict = decode_dataset(
dl=reazonspeech_corpus.test_dataloaders( dl=test_dl,
getattr(reazonspeech_corpus, f"{subdir}_cuts")() params=params,
), model=model,
params=params, sp=sp,
model=model, word_table=word_table,
sp=sp, decoding_graph=decoding_graph,
word_table=word_table, context_graph=context_graph,
decoding_graph=decoding_graph, LM=LM,
context_graph=context_graph, ngram_lm=ngram_lm,
LM=LM, ngram_lm_scale=ngram_lm_scale,
ngram_lm=ngram_lm, )
ngram_lm_scale=ngram_lm_scale,
)
save_asr_output( save_asr_output(
params=params, params=params,
test_set_name=subdir, test_set_name=test_set,
results_dict=results_dict, results_dict=results_dict,
) )
# with (
# params.res_dir
# / (
# f"{subdir}-{params.decode_chunk_len}_{params.beam_size}"
# f"_{params.avg}_{params.epoch}.cer"
# )
# ).open("w") as fout:
# if len(tot_err) == 1:
# fout.write(f"{tot_err[0][1]}")
# else:
# fout.write("\n".join(f"{k}\t{v}") for k, v in tot_err)
if not params.skip_scoring: if not params.skip_scoring:
save_wer_results( save_wer_results(

View File

@ -1,2 +0,0 @@
2024-07-29 17:52:11,668 INFO [streaming_decode.py:736] Decoding started
2024-07-29 17:52:11,669 INFO [streaming_decode.py:742] Device: cuda:0

View File

@ -1,2 +0,0 @@
2024-07-29 17:54:22,556 INFO [streaming_decode.py:736] Decoding started
2024-07-29 17:54:22,556 INFO [streaming_decode.py:742] Device: cuda:0

View File

@ -1,2 +0,0 @@
2024-07-29 17:55:15,276 INFO [streaming_decode.py:736] Decoding started
2024-07-29 17:55:15,277 INFO [streaming_decode.py:742] Device: cuda:0

View File

@ -1,2 +0,0 @@
2024-07-29 17:59:02,028 INFO [streaming_decode.py:736] Decoding started
2024-07-29 17:59:02,029 INFO [streaming_decode.py:742] Device: cuda:0

View File

@ -1,5 +0,0 @@
2024-07-29 18:01:06,736 INFO [streaming_decode.py:736] Decoding started
2024-07-29 18:01:06,736 INFO [streaming_decode.py:742] Device: cuda:0
2024-07-29 18:01:06,740 INFO [streaming_decode.py:753] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '8f976a1e1407e330e2a233d68f81b1eb5269fdaa', 'k2-git-date': 'Thu Jun 6 02:13:08 2024', 'lhotse-version': '1.26.0.dev+git.bd12d5d.clean', 'torch-version': '2.3.1+cu121', 'torch-cuda-available': True, 'torch-cuda-version': '12.1', 'python-version': '3.10', 'icefall-git-branch': 'jp-streaming', 'icefall-git-sha1': '4af81af-dirty', 'icefall-git-date': 'Thu Jul 18 22:05:59 2024', 'icefall-path': '/root/tmp/icefall', 'k2-path': '/root/miniconda3/envs/myenv/lib/python3.10/site-packages/k2/__init__.py', 'lhotse-path': '/root/miniconda3/envs/myenv/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'KDA00', 'IP address': '192.168.0.1'}, 'epoch': 28, 'iter': 0, 'avg': 15, 'use_averaged_model': True, 'exp_dir': PosixPath('zipformer'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'num_active_paths': 4, 'beam': 4, 'max_contexts': 4, 'max_states': 32, 'context_size': 2, 'num_decode_streams': 2000, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'manifest_dir': PosixPath('data/manifests'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': False, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': False, 'lang': PosixPath('data/lang_char'), 'lang_type': None, 'res_dir': PosixPath('zipformer/streaming/greedy_search'), 'suffix': 'epoch-28-avg-15-chunk-32-left-context-256-use-averaged-model', 'blank_id': 0, 'unk_id': 2990, 'vocab_size': 2992}
2024-07-29 18:01:06,740 INFO [streaming_decode.py:755] About to create model
2024-07-29 18:01:07,118 INFO [streaming_decode.py:822] Calculating the averaged model over epoch range from 13 (excluded) to 28