36 Commits

Author SHA1 Message Date
root
e65725810c fix mmsu 2025-05-13 09:13:12 +00:00
root
cbf3af31fd add voicebench eval 2025-05-13 05:37:11 +00:00
root
89781b9bb1 add cosyvoice2 decode 2025-05-12 10:06:59 +00:00
root
b20a0d0e35 add on the fly feature 2025-05-08 19:21:41 -07:00
root
bd2df570ad add debug script 2025-05-08 03:37:26 -07:00
root
37db65984c remove k2 dependency 2025-05-08 03:02:34 -07:00
root
e41c1cabd5 add dependency 2025-05-08 07:56:14 +00:00
root
7cc366d82d add en data, cosy2 token for training 2025-05-08 07:23:22 +00:00
root
2dd40b62ef add vocalnet en data 2025-05-08 06:29:46 +00:00
root
08be51a91f change pic 2025-04-29 10:09:57 +00:00
root
11bd3c9ad8 lint 2025-04-29 09:46:44 +00:00
root
360f0aa397 update README 2025-04-29 08:49:12 +00:00
root
448a4eeea7 update hf dataset loading into lhotse 2025-04-29 07:33:34 +00:00
Yuekai Zhang
d742043e75 refactor decode part 2025-04-25 18:31:43 +08:00
root
71a0a442a6 add history cache 2025-04-25 10:05:07 +00:00
Yuekai Zhang
47920c2336 add gradio demo 2025-04-25 16:05:37 +08:00
Yuekai Zhang
72addd40f5 change place 2025-04-25 14:22:16 +08:00
Yuekai Zhang
9a07363a8d remove unsed 2025-04-25 14:21:50 +08:00
Yuekai Zhang
6ea7ec8543 remove offline tab 2025-04-25 14:10:30 +08:00
Yuekai Zhang
6955639d22 add qwen omni web demo 2025-04-25 14:08:25 +08:00
root
3642dfd8c3 refactor code 2025-04-25 05:36:18 +00:00
root
2e9be46703 debug 2025-04-24 08:24:11 +00:00
root
478d56efd8 fix bugs when padding right 2025-04-23 07:33:27 +00:00
Yuekai Zhang
23fdef2fd3 add codec decode 2025-04-21 17:57:57 +08:00
Yuekai Zhang
09d81b44a7 change padding side name 2025-04-21 17:10:25 +08:00
Yuekai Zhang
7db40052d6 add flash attn support 2025-04-21 14:54:28 +08:00
root
b305cdacc0 fix padding side 2025-04-21 06:23:10 +00:00
root
bdb60f6ddc add codec lm 2025-04-21 01:00:06 +00:00
root
458d697acc fix batch_size>1 decoding bug 2025-04-15 13:41:33 +00:00
root
0c02da82ac refine decoding method 2025-04-15 06:53:20 +00:00
root
3ad075af60 s2t training 2025-04-15 02:16:03 +00:00
Yuekai Zhang
1d11662016 fix multi rounds data 2025-04-14 14:32:42 +08:00
root
202d764cfb remove text norm 2025-04-14 05:35:07 +00:00
root
6b69276b19 add training stage 2025-04-11 06:51:51 +00:00
root
e6897b10fa make asr decode results align 2025-04-11 06:51:51 +00:00
root
cca562d538 migrate from speech llm 2025-04-11 06:51:50 +00:00