mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
fix the CTC zipformer2 training
- too many supervision tokens - change filtering rule to `if (T - 2) < len(tokens): return False` - this prevents inf. from appearing in the CTC loss value
This commit is contained in:
parent
3b257dd5ae
commit
d400bc5edf
@ -1300,9 +1300,11 @@ def run(rank, world_size, args):
|
|||||||
T = ((c.num_frames - 7) // 2 + 1) // 2
|
T = ((c.num_frames - 7) // 2 + 1) // 2
|
||||||
tokens = sp.encode(c.supervisions[0].text, out_type=str)
|
tokens = sp.encode(c.supervisions[0].text, out_type=str)
|
||||||
|
|
||||||
if T < len(tokens):
|
# For CTC `(T - 2) < len(tokens)` is needed. otherwise inf. in loss appears.
|
||||||
|
# For Transducer `T < len(tokens)` was okay.
|
||||||
|
if (T - 2) < len(tokens):
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"Exclude cut with ID {c.id} from training. "
|
f"Exclude cut with ID {c.id} from training (too many supervision tokens). "
|
||||||
f"Number of frames (before subsampling): {c.num_frames}. "
|
f"Number of frames (before subsampling): {c.num_frames}. "
|
||||||
f"Number of frames (after subsampling): {T}. "
|
f"Number of frames (after subsampling): {T}. "
|
||||||
f"Text: {c.supervisions[0].text}. "
|
f"Text: {c.supervisions[0].text}. "
|
||||||
|
Loading…
x
Reference in New Issue
Block a user