fix the CTC zipformer2 training

- too many supervision tokens
- change filtering rule to `if (T - 2) < len(tokens): return False`
- this prevents inf. from appearing in the CTC loss value
This commit is contained in:
Karel Vesely 2024-08-12 10:45:57 +02:00
parent 3b257dd5ae
commit d400bc5edf

View File

@ -1300,9 +1300,11 @@ def run(rank, world_size, args):
T = ((c.num_frames - 7) // 2 + 1) // 2 T = ((c.num_frames - 7) // 2 + 1) // 2
tokens = sp.encode(c.supervisions[0].text, out_type=str) tokens = sp.encode(c.supervisions[0].text, out_type=str)
if T < len(tokens): # For CTC `(T - 2) < len(tokens)` is needed. otherwise inf. in loss appears.
# For Transducer `T < len(tokens)` was okay.
if (T - 2) < len(tokens):
logging.warning( logging.warning(
f"Exclude cut with ID {c.id} from training. " f"Exclude cut with ID {c.id} from training (too many supervision tokens). "
f"Number of frames (before subsampling): {c.num_frames}. " f"Number of frames (before subsampling): {c.num_frames}. "
f"Number of frames (after subsampling): {T}. " f"Number of frames (after subsampling): {T}. "
f"Text: {c.supervisions[0].text}. " f"Text: {c.supervisions[0].text}. "