icefall

mirror of https://github.com/k2-fsa/icefall.git synced 2025-08-09 18:12:19 +00:00

Author	SHA1	Message	Date
Yuekai Zhang	a5de488304	Merge 559f9e2deff33077461428d422d9f03c95988b01 into 34fc1fdf0d8ff520e2bb18267d046ca207c78ef9	2025-07-24 22:09:54 +05:30
Fangjun Kuang	34fc1fdf0d	Fix transformer decoder layer (#1995 )	2025-07-18 20:12:29 +08:00
Bailey Machiko Hirota	5fe13078cc	Musan implementation for ReazonSpeech (#1988 )	2025-07-18 17:16:19 +08:00
Yifan Yang	9fd0f2dc1d	support left pad for make_pad_mask (#1990 )	2025-07-16 23:59:04 +08:00
Fangjun Kuang	e22bc78f98	Export streaming zipformer2 to RKNN (#1977 )	2025-07-11 13:24:01 +08:00
Teo Wen Shen	da87e7fc99	add weights_only=False to torch.load (#1984 )	2025-07-10 15:27:08 +08:00
Yifan Yang	89728dd4f8	Refactor data preparation for GigaSpeech recipe (#1986 )	2025-07-10 11:17:37 +08:00
Mistmoon	9293edc62f	Add cr-ctc loss and ctc-decode in aishell (#1980 )	2025-07-08 14:47:24 +08:00
Fangjun Kuang	fba5e67d5e	Fix CI tests. (#1974 ) - Introduce unified AMP helpers (create_grad_scaler, torch_autocast) to handle deprecations in PyTorch ≥2.3.0 - Replace direct uses of torch.cuda.amp.GradScaler and torch.cuda.amp.autocast with the new utilities across all training and inference scripts - Update all torch.load calls to include weights_only=False for compatibility with newer PyTorch versions	2025-07-01 13:47:55 +08:00
Fangjun Kuang	71377d21cd	Export streaming zipformer models with whisper feature to onnx (#1973 )	2025-06-30 19:01:15 +08:00
Fangjun Kuang	abd9437e6d	Add more wheels for piper-phonemize (#1969 )	2025-06-24 14:49:16 +08:00
Wei Kang	e1cf4dbace	rm zipvoice (#1967 )	2025-06-23 19:22:35 +08:00
Wei Kang	343b8fa2dc	Using non strict match in context graph for contextual words (#1952 )	2025-06-19 12:27:15 +08:00
Wei Kang	f80a2ee110	Decrease num_buckets & remove shuffle_buffer_size (#1955 )	2025-06-19 12:26:37 +08:00
Wei Kang	3587c4b3b7	Fix decoding byte bpes tokens to words. (#1966 )	2025-06-19 12:26:01 +08:00
Wei Kang	762f965cf7	[zipvoice] Add requirements.txt and pinyin.txt, remove k2 from pretrained model inference. (#1965 ) * Add requirements.txt and pinyin.txt needed by zipvoice * simplify the requirements for pretrained model inference	2025-06-18 18:38:46 +08:00
Wei Kang	06539d2b9d	Add Zipvoice (#1964 ) * Add ZipVoice - a flow-matching based zero-shot TTS model.	2025-06-17 20:17:12 +08:00
root	559f9e2def	fix repeat bos and pad id	2025-06-04 10:02:42 +00:00
root	80677a55f8	remove stats	2025-06-03 00:48:39 -07:00
root	5becf6927d	remove concat three items	2025-06-03 00:18:21 -07:00
root	4c0396f8f2	support text2speech ultrachat	2025-06-02 23:16:03 -07:00
root	49256fa917	fix tts stage decode	2025-05-28 02:34:07 +00:00
root	5a7c72cb47	add tts task decode	2025-05-27 02:12:22 -07:00
root	1281d7a515	add tts training	2025-05-27 00:18:23 -07:00
Zengwei Yao	ffb7d05635	refactor branch exchange in cr-ctc (#1954 )	2025-05-27 12:09:59 +08:00
root	39700d5c94	refactor train to reuse code	2025-05-26 19:53:16 -07:00
root	e6e1f3fa4f	add tts stage	2025-05-23 01:53:05 -07:00
root	dd858f0cd1	support instruct s2s	2025-05-22 23:16:33 -07:00
root	9fff18edec	refactor code	2025-05-22 19:14:52 -07:00
Mahsa Yarmohammadi	021e1a8846	Add acknowledgment to README (#1950 )	2025-05-22 22:06:35 +08:00
root	7a12d88d6c	update	2025-05-21 22:18:57 -07:00
root	7aa6c80ddb	add multi gpu processing	2025-05-21 21:54:59 -07:00
Tianxiang Zhao	30e7ea4b5a	Fix a bug in finetune.py --use-mux (#1949 )	2025-05-22 12:05:01 +08:00
Fangjun Kuang	fd8f8780fa	Fix logging torch.dtype. (#1947 )	2025-05-21 12:04:57 +08:00
root	ca84aff5d6	remove cosyvoice lib	2025-05-20 00:52:09 -07:00
root	9cdd393f43	add server url	2025-05-20 07:48:49 +00:00
root	50fc1aba60	add multi-node	2025-05-18 18:47:22 -07:00
root	4a29430349	add loss type	2025-05-19 01:31:21 +00:00
root	e52581e69b	support local_rank for multi-node	2025-05-16 00:02:12 -07:00
root	0e8c1db4d0	fix speed perturb issue	2025-05-15 22:45:04 -07:00
root	bfb4ebeb83	remove triton	2025-05-15 14:32:49 +00:00
root	f81363d324	add speech continuation pretraining	2025-05-15 14:16:51 +00:00
root	e65725810c	fix mmsu	2025-05-13 09:13:12 +00:00
root	cbf3af31fd	add voicebench eval	2025-05-13 05:37:11 +00:00
Yifan Yang	e79833aad2	ensure SwooshL/SwooshR output dtype matches input dtype (#1940 )	2025-05-12 19:28:48 +08:00
root	89781b9bb1	add cosyvoice2 decode	2025-05-12 10:06:59 +00:00
Yifan Yang	4627969ccd	fix bug: undefined name 'partial' (#1941 )	2025-05-12 14:19:53 +08:00
root	b20a0d0e35	add on the fly feature	2025-05-08 19:21:41 -07:00
root	bd2df570ad	add debug script	2025-05-08 03:37:26 -07:00
root	37db65984c	remove k2 dependency	2025-05-08 03:02:34 -07:00

1 2 3 4 5 ...

1277 Commits