Merge branch 'k2-fsa:master' into fix/multi-zh-en

This commit is contained in:
zr_jin 2024-03-05 11:58:11 +08:00 committed by GitHub
commit 0157687925
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
71 changed files with 317 additions and 168 deletions

View File

@ -49,7 +49,7 @@ jobs:
- name: Install Python dependencies
run: |
python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0
python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0 isort==5.10.1
# Click issue fixed in https://github.com/psf/black/pull/2966
- name: Run flake8
@ -67,3 +67,9 @@ jobs:
working-directory: ${{github.workspace}}
run: |
black --check --diff .
- name: Run isort
shell: bash
working-directory: ${{github.workspace}}
run: |
isort --check --diff .

View File

@ -26,7 +26,7 @@ repos:
# E121,E123,E126,E226,E24,E704,W503,W504
- repo: https://github.com/pycqa/isort
rev: 5.11.5
rev: 5.10.1
hooks:
- id: isort
args: ["--profile=black"]

View File

@ -79,10 +79,10 @@ It will generate the following 3 files inside $repo/exp:
import argparse
import logging
from icefall import is_module_available
import torch
from onnx_pretrained import OnnxModel
import torch
from icefall import is_module_available
def get_parser():

View File

@ -70,9 +70,9 @@ import logging
from pathlib import Path
import torch
from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
from scaling_converter import convert_scaled_to_non_scaled
from tokenizer import Tokenizer
from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
from icefall.checkpoint import (
average_checkpoints,

View File

@ -23,6 +23,7 @@ from pathlib import Path
from lhotse import CutSet, SupervisionSegment
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import str2bool
# Similar text filtering and normalization procedure as in:

View File

@ -76,6 +76,7 @@ from beam_search import (
)
from gigaspeech_scoring import asr_text_post_processing
from train import get_params, get_transducer_model
from icefall.checkpoint import (
average_checkpoints,
average_checkpoints_with_averaged_model,

View File

@ -88,7 +88,7 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import GigaSpeechAsrDataModule
from train import add_model_arguments, get_params, get_model
from train import add_model_arguments, get_model, get_params
from icefall.checkpoint import (
average_checkpoints,

View File

@ -51,7 +51,7 @@ from streaming_beam_search import (
)
from torch import Tensor, nn
from torch.nn.utils.rnn import pad_sequence
from train import add_model_arguments, get_params, get_model
from train import add_model_arguments, get_model, get_params
from icefall.checkpoint import (
average_checkpoints,

View File

@ -42,12 +42,10 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import GigaSpeechAsrDataModule
from beam_search import (
keywords_search,
)
from beam_search import keywords_search
from lhotse.cut import Cut
from train import add_model_arguments, get_model, get_params
from lhotse.cut import Cut
from icefall import ContextGraph
from icefall.checkpoint import (
average_checkpoints,

View File

@ -76,6 +76,20 @@ from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from train import (
add_model_arguments,
add_training_arguments,
compute_loss,
compute_validation_loss,
display_and_save_batch,
get_adjusted_batch_count,
get_model,
get_params,
load_checkpoint_if_available,
save_checkpoint,
scan_pessimistic_batches_for_oom,
set_batch_count,
)
from icefall import diagnostics
from icefall.checkpoint import remove_checkpoints
@ -95,21 +109,6 @@ from icefall.utils import (
str2bool,
)
from train import (
add_model_arguments,
add_training_arguments,
compute_loss,
compute_validation_loss,
display_and_save_batch,
get_adjusted_batch_count,
get_model,
get_params,
load_checkpoint_if_available,
save_checkpoint,
scan_pessimistic_batches_for_oom,
set_batch_count,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]

View File

@ -24,8 +24,7 @@ To run this file, do:
"""
import torch
from train import get_params, get_ctc_model
from train import get_ctc_model, get_params
def test_model():

View File

@ -59,9 +59,9 @@ import onnx
import torch
import torch.nn as nn
from decoder import Decoder
from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
from emformer import Emformer
from scaling_converter import convert_scaled_to_non_scaled
from do_not_use_it_directly import add_model_arguments, get_params, get_transducer_model
from icefall.checkpoint import (
average_checkpoints,

View File

@ -39,7 +39,7 @@ Usage of this script:
import argparse
import logging
import math
from typing import List
from typing import List, Optional
import kaldifeat
import sentencepiece as spm
@ -47,7 +47,6 @@ import torch
import torchaudio
from kaldifeat import FbankOptions, OnlineFbank, OnlineFeature
from torch.nn.utils.rnn import pad_sequence
from typing import Optional, List
def get_parser():

View File

@ -31,28 +31,28 @@ https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stat
"""
import argparse
import torch.multiprocessing as mp
import torch
import torch.nn as nn
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import List, Optional, Tuple
from pathlib import Path
from typing import List, Optional, Tuple
import k2
import sentencepiece as spm
import torch
import torch.multiprocessing as mp
import torch.nn as nn
from asr_datamodule import AsrDataModule
from beam_search import (
fast_beam_search_one_best,
greedy_search_batch,
modified_beam_search,
)
from icefall.utils import AttributeDict, convert_timestamp, setup_logger
from lhotse import CutSet, load_manifest_lazy
from lhotse.cut import Cut
from lhotse.supervision import AlignmentItem
from lhotse.serialization import SequentialJsonlWriter
from lhotse.supervision import AlignmentItem
from icefall.utils import AttributeDict, convert_timestamp, setup_logger
def get_parser():

View File

@ -73,12 +73,11 @@ It will generate the following 3 files inside $repo/exp:
import argparse
import logging
import torch
from onnx_pretrained import OnnxModel
from icefall import is_module_available
import torch
def get_parser():
parser = argparse.ArgumentParser(

View File

@ -22,11 +22,12 @@ Usage: ./pruned_transducer_stateless/my_profile.py
import argparse
import logging
import sentencepiece as spm
import torch
from train import add_model_arguments, get_encoder_model, get_params
from icefall.profiler import get_model_profile
from train import get_encoder_model, add_model_arguments, get_params
def get_parser():

View File

@ -75,8 +75,7 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from onnx_pretrained import greedy_search, OnnxModel
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats

View File

@ -78,10 +78,10 @@ It will generate the following 3 files inside $repo/exp:
import argparse
import logging
from icefall import is_module_available
import torch
from onnx_pretrained import OnnxModel
import torch
from icefall import is_module_available
def get_parser():

View File

@ -76,8 +76,7 @@ import torch
import torch.nn as nn
from asr_datamodule import AsrDataModule
from librispeech import LibriSpeech
from onnx_pretrained import greedy_search, OnnxModel
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats

View File

@ -22,15 +22,15 @@ Usage: ./pruned_transducer_stateless4/my_profile.py
import argparse
import logging
from typing import Tuple
import sentencepiece as spm
import torch
from typing import Tuple
from scaling import BasicNorm, DoubleSwish
from torch import Tensor, nn
from train import add_model_arguments, get_encoder_model, get_joiner_model, get_params
from icefall.profiler import get_model_profile
from scaling import BasicNorm, DoubleSwish
from train import get_encoder_model, get_joiner_model, add_model_arguments, get_params
def get_parser():

View File

@ -82,8 +82,7 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from onnx_pretrained import greedy_search, OnnxModel
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats

View File

@ -20,7 +20,6 @@ from typing import List
import k2
import torch
from beam_search import Hypothesis, HypothesisList, get_hyps_shape
# The force alignment problem can be formulated as finding

View File

@ -107,9 +107,6 @@ import k2
import sentencepiece as spm
import torch
import torch.nn as nn
# from asr_datamodule import LibriSpeechAsrDataModule
from gigaspeech import GigaSpeechAsrDataModule
from beam_search import (
beam_search,
fast_beam_search_nbest,
@ -120,6 +117,9 @@ from beam_search import (
greedy_search_batch,
modified_beam_search,
)
# from asr_datamodule import LibriSpeechAsrDataModule
from gigaspeech import GigaSpeechAsrDataModule
from gigaspeech_scoring import asr_text_post_processing
from train import add_model_arguments, get_params, get_transducer_model

View File

@ -65,16 +65,15 @@ from typing import Dict, List
import sentencepiece as spm
import torch
from train import add_model_arguments, get_params, get_transducer_model
from icefall.utils import str2bool
from icefall.checkpoint import (
average_checkpoints,
average_checkpoints_with_averaged_model,
find_checkpoints,
load_checkpoint,
)
from icefall.utils import str2bool
def get_parser():

View File

@ -22,15 +22,15 @@ Usage: ./pruned_transducer_stateless7/my_profile.py
import argparse
import logging
from typing import Tuple
import sentencepiece as spm
import torch
from typing import Tuple
from scaling import BasicNorm, DoubleSwish
from torch import Tensor, nn
from train import add_model_arguments, get_encoder_model, get_joiner_model, get_params
from icefall.profiler import get_model_profile
from scaling import BasicNorm, DoubleSwish
from train import get_encoder_model, get_joiner_model, add_model_arguments, get_params
def get_parser():

View File

@ -75,8 +75,7 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from onnx_pretrained import greedy_search, OnnxModel
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats

View File

@ -24,7 +24,6 @@ To run this file, do:
"""
import torch
from scaling_converter import convert_scaled_to_non_scaled
from train import get_params, get_transducer_model

View File

@ -118,8 +118,8 @@ from beam_search import (
greedy_search_batch,
modified_beam_search,
)
from train import add_model_arguments, get_params, get_transducer_model
from torch.nn.utils.rnn import pad_sequence
from train import add_model_arguments, get_params, get_transducer_model
from icefall.checkpoint import (
average_checkpoints,

View File

@ -18,10 +18,7 @@ from typing import List, Optional, Tuple, Union
import torch
import torch.nn as nn
from scaling import (
ActivationBalancer,
ScaledConv1d,
)
from scaling import ActivationBalancer, ScaledConv1d
class LConv(nn.Module):

View File

@ -52,7 +52,7 @@ import onnxruntime as ort
import sentencepiece as spm
import torch
import torchaudio
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence
from icefall.utils import make_pad_mask

View File

@ -14,6 +14,7 @@
import torch
from torch import nn
from icefall.utils import make_pad_mask

View File

@ -4,7 +4,6 @@
import ncnn
import numpy as np
layer_list = []

View File

@ -42,7 +42,6 @@ import ncnn
import torch
import torchaudio
from kaldifeat import FbankOptions, OnlineFbank, OnlineFeature
from ncnn_custom_layer import RegisterCustomLayers

View File

@ -1,10 +1,11 @@
import argparse
import logging
import math
import pprint
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import pprint
import k2
import sentencepiece as spm
import torch

View File

@ -88,7 +88,7 @@ import sentencepiece as spm
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from train import add_model_arguments, get_params, get_model
from train import add_model_arguments, get_model, get_params
from icefall.checkpoint import (
average_checkpoints,

View File

@ -22,9 +22,9 @@ import k2
import torch
import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos, make_pad_mask
from scaling import ScaledLinear
class AsrModel(nn.Module):

View File

@ -22,24 +22,24 @@ Usage: ./zipformer/my_profile.py
import argparse
import logging
from typing import Tuple
import sentencepiece as spm
import torch
from typing import Tuple
from torch import Tensor, nn
from icefall.utils import make_pad_mask
from icefall.profiler import get_model_profile
from scaling import BiasNorm
from torch import Tensor, nn
from train import (
add_model_arguments,
get_encoder_embed,
get_encoder_model,
get_joiner_model,
add_model_arguments,
get_params,
)
from zipformer import BypassModule
from icefall.profiler import get_model_profile
from icefall.utils import make_pad_mask
def get_parser():
parser = argparse.ArgumentParser(

View File

@ -77,11 +77,10 @@ from typing import List, Tuple
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from onnx_pretrained import greedy_search, OnnxModel
from k2 import SymbolTable
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats
from k2 import SymbolTable
def get_parser():

View File

@ -27,11 +27,10 @@ https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
import argparse
import logging
import math
from typing import List, Tuple
from typing import Dict, List, Tuple
import k2
import kaldifeat
from typing import Dict
import kaldifst
import onnxruntime as ort
import torch

View File

@ -27,11 +27,10 @@ https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
import argparse
import logging
import math
from typing import List, Tuple
from typing import Dict, List, Tuple
import k2
import kaldifeat
from typing import Dict
import kaldifst
import onnxruntime as ort
import torch

View File

@ -27,11 +27,10 @@ https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
import argparse
import logging
import math
from typing import List, Tuple
from typing import Dict, List, Tuple
import k2
import kaldifeat
from typing import Dict
import kaldifst
import onnxruntime as ort
import torch

View File

@ -15,15 +15,16 @@
# limitations under the License.
from typing import Optional, Tuple, Union
import logging
import k2
from torch.cuda.amp import custom_fwd, custom_bwd
import random
import torch
import math
import random
from typing import Optional, Tuple, Union
import k2
import torch
import torch.nn as nn
from torch import Tensor
from torch.cuda.amp import custom_bwd, custom_fwd
def logaddexp_onnx(x: Tensor, y: Tensor) -> Tensor:

View File

@ -51,7 +51,7 @@ from streaming_beam_search import (
)
from torch import Tensor, nn
from torch.nn.utils.rnn import pad_sequence
from train import add_model_arguments, get_params, get_model
from train import add_model_arguments, get_model, get_params
from icefall.checkpoint import (
average_checkpoints,

View File

@ -16,11 +16,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Tuple
import warnings
from typing import Tuple
import torch
from torch import Tensor, nn
from scaling import (
Balancer,
BiasNorm,
@ -34,6 +33,7 @@ from scaling import (
SwooshR,
Whiten,
)
from torch import Tensor, nn
class ConvNeXt(nn.Module):

View File

@ -858,7 +858,9 @@ def main():
logging.info("About to create model")
model = get_model(params)
import pdb; pdb.set_trace()
import pdb
pdb.set_trace()
if not params.use_averaged_model:
if params.iter > 0:
@ -877,9 +879,13 @@ def main():
)
logging.info(f"averaging {filenames}")
model.to(device)
model.load_state_dict(average_checkpoints(filenames, device=device), strict=False)
model.load_state_dict(
average_checkpoints(filenames, device=device), strict=False
)
elif params.avg == 1:
load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model, strict=False)
load_checkpoint(
f"{params.exp_dir}/epoch-{params.epoch}.pt", model, strict=False
)
else:
start = params.epoch - params.avg + 1
filenames = []
@ -888,7 +894,9 @@ def main():
filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
logging.info(f"averaging {filenames}")
model.to(device)
model.load_state_dict(average_checkpoints(filenames, device=device), strict=False)
model.load_state_dict(
average_checkpoints(filenames, device=device), strict=False
)
else:
if params.iter > 0:
filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
@ -917,7 +925,7 @@ def main():
filename_end=filename_end,
device=device,
),
strict=False
strict=False,
)
else:
assert params.avg > 0, params.avg
@ -936,7 +944,7 @@ def main():
filename_end=filename_end,
device=device,
),
strict=False
strict=False,
)
model.to(device)

View File

@ -121,7 +121,7 @@ from beam_search import (
modified_beam_search_lm_shallow_fusion,
modified_beam_search_LODR,
)
from train import add_model_arguments, add_finetune_arguments, get_model, get_params
from train import add_finetune_arguments, add_model_arguments, get_model, get_params
from icefall import ContextGraph, LmScorer, NgramLm
from icefall.checkpoint import (

View File

@ -72,7 +72,7 @@ import torch.nn as nn
from decoder import Decoder
from onnxruntime.quantization import QuantType, quantize_dynamic
from scaling_converter import convert_scaled_to_non_scaled
from train import add_model_arguments, add_finetune_arguments, get_model, get_params
from train import add_finetune_arguments, add_model_arguments, get_model, get_params
from zipformer import Zipformer2
from icefall.checkpoint import (

View File

@ -77,11 +77,10 @@ from typing import List, Tuple
import torch
import torch.nn as nn
from asr_datamodule import LibriSpeechAsrDataModule
from onnx_pretrained import greedy_search, OnnxModel
from k2 import SymbolTable
from onnx_pretrained import OnnxModel, greedy_search
from icefall.utils import setup_logger, store_transcripts, write_error_stats
from k2 import SymbolTable
conversational_filler = [
"UH",
@ -182,6 +181,7 @@ def get_parser():
return parser
def post_processing(
results: List[Tuple[str, List[str], List[str]]],
) -> List[Tuple[str, List[str], List[str]]]:
@ -192,6 +192,7 @@ def post_processing(
new_results.append((key, new_ref, new_hyp))
return new_results
def decode_one_batch(
model: OnnxModel, token_table: SymbolTable, batch: dict
) -> List[List[str]]:

View File

@ -137,14 +137,14 @@ def add_finetune_arguments(parser: argparse.ArgumentParser):
"--use-adapters",
type=str2bool,
default=True,
help="If use adapter to finetune the model"
help="If use adapter to finetune the model",
)
parser.add_argument(
"--adapter-dim",
type=int,
default=16,
help="The bottleneck dimension of the adapter"
help="The bottleneck dimension of the adapter",
)
parser.add_argument(
@ -1273,7 +1273,11 @@ def run(rank, world_size, args):
else:
p.requires_grad = False
logging.info("A total of {} trainable parameters ({:.3f}% of the whole model)".format(num_trainable, num_trainable/num_param * 100))
logging.info(
"A total of {} trainable parameters ({:.3f}% of the whole model)".format(
num_trainable, num_trainable / num_param * 100
)
)
model.to(device)
if world_size > 1:

View File

@ -40,13 +40,13 @@ from scaling import (
Dropout2,
FloatLike,
ScheduledFloat,
SwooshL,
SwooshR,
Whiten,
convert_num_channels,
limit_param_value,
penalize_abs_values_gt,
softmax,
SwooshL,
SwooshR,
)
from torch import Tensor, nn

View File

@ -1,3 +1,4 @@
See https://k2-fsa.github.io/icefall/recipes/TTS/ljspeech/vits.html for detailed tutorials.
Training logs, Tensorboard logs, and checkpoints are uploaded to https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2023-11-29.
Training logs, Tensorboard logs, and checkpoints are uploaded to
https://huggingface.co/Zengwei/icefall-tts-ljspeech-vits-2024-02-28

View File

@ -91,7 +91,7 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]):
for key, value in meta_data.items():
meta = model.metadata_props.add()
meta.key = key
meta.value = value
meta.value = str(value)
onnx.save(model, filename)
@ -199,10 +199,15 @@ def export_model_onnx(
)
meta_data = {
"model_type": "VITS",
"model_type": "vits",
"version": "1",
"model_author": "k2-fsa",
"comment": "VITS generator",
"comment": "icefall", # must be icefall for models from icefall
"language": "English",
"voice": "en-us", # Choose your language appropriately
"has_espeak": 1,
"n_speakers": 1,
"sample_rate": 22050, # Must match the real sample rate
}
logging.info(f"meta_data: {meta_data}")
@ -268,3 +273,144 @@ if __name__ == "__main__":
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
logging.basicConfig(format=formatter, level=logging.INFO)
main()
"""
Supported languages.
LJSpeech is using "en-us" from the second column.
Pty Language Age/Gender VoiceName File Other Languages
5 af --/M Afrikaans gmw/af
5 am --/M Amharic sem/am
5 an --/M Aragonese roa/an
5 ar --/M Arabic sem/ar
5 as --/M Assamese inc/as
5 az --/M Azerbaijani trk/az
5 ba --/M Bashkir trk/ba
5 be --/M Belarusian zle/be
5 bg --/M Bulgarian zls/bg
5 bn --/M Bengali inc/bn
5 bpy --/M Bishnupriya_Manipuri inc/bpy
5 bs --/M Bosnian zls/bs
5 ca --/M Catalan roa/ca
5 chr-US-Qaaa-x-west --/M Cherokee_ iro/chr
5 cmn --/M Chinese_(Mandarin,_latin_as_English) sit/cmn (zh-cmn 5)(zh 5)
5 cmn-latn-pinyin --/M Chinese_(Mandarin,_latin_as_Pinyin) sit/cmn-Latn-pinyin (zh-cmn 5)(zh 5)
5 cs --/M Czech zlw/cs
5 cv --/M Chuvash trk/cv
5 cy --/M Welsh cel/cy
5 da --/M Danish gmq/da
5 de --/M German gmw/de
5 el --/M Greek grk/el
5 en-029 --/M English_(Caribbean) gmw/en-029 (en 10)
2 en-gb --/M English_(Great_Britain) gmw/en (en 2)
5 en-gb-scotland --/M English_(Scotland) gmw/en-GB-scotland (en 4)
5 en-gb-x-gbclan --/M English_(Lancaster) gmw/en-GB-x-gbclan (en-gb 3)(en 5)
5 en-gb-x-gbcwmd --/M English_(West_Midlands) gmw/en-GB-x-gbcwmd (en-gb 9)(en 9)
5 en-gb-x-rp --/M English_(Received_Pronunciation) gmw/en-GB-x-rp (en-gb 4)(en 5)
2 en-us --/M English_(America) gmw/en-US (en 3)
5 en-us-nyc --/M English_(America,_New_York_City) gmw/en-US-nyc
5 eo --/M Esperanto art/eo
5 es --/M Spanish_(Spain) roa/es
5 es-419 --/M Spanish_(Latin_America) roa/es-419 (es-mx 6)
5 et --/M Estonian urj/et
5 eu --/M Basque eu
5 fa --/M Persian ira/fa
5 fa-latn --/M Persian_(Pinglish) ira/fa-Latn
5 fi --/M Finnish urj/fi
5 fr-be --/M French_(Belgium) roa/fr-BE (fr 8)
5 fr-ch --/M French_(Switzerland) roa/fr-CH (fr 8)
5 fr-fr --/M French_(France) roa/fr (fr 5)
5 ga --/M Gaelic_(Irish) cel/ga
5 gd --/M Gaelic_(Scottish) cel/gd
5 gn --/M Guarani sai/gn
5 grc --/M Greek_(Ancient) grk/grc
5 gu --/M Gujarati inc/gu
5 hak --/M Hakka_Chinese sit/hak
5 haw --/M Hawaiian map/haw
5 he --/M Hebrew sem/he
5 hi --/M Hindi inc/hi
5 hr --/M Croatian zls/hr (hbs 5)
5 ht --/M Haitian_Creole roa/ht
5 hu --/M Hungarian urj/hu
5 hy --/M Armenian_(East_Armenia) ine/hy (hy-arevela 5)
5 hyw --/M Armenian_(West_Armenia) ine/hyw (hy-arevmda 5)(hy 8)
5 ia --/M Interlingua art/ia
5 id --/M Indonesian poz/id
5 io --/M Ido art/io
5 is --/M Icelandic gmq/is
5 it --/M Italian roa/it
5 ja --/M Japanese jpx/ja
5 jbo --/M Lojban art/jbo
5 ka --/M Georgian ccs/ka
5 kk --/M Kazakh trk/kk
5 kl --/M Greenlandic esx/kl
5 kn --/M Kannada dra/kn
5 ko --/M Korean ko
5 kok --/M Konkani inc/kok
5 ku --/M Kurdish ira/ku
5 ky --/M Kyrgyz trk/ky
5 la --/M Latin itc/la
5 lb --/M Luxembourgish gmw/lb
5 lfn --/M Lingua_Franca_Nova art/lfn
5 lt --/M Lithuanian bat/lt
5 ltg --/M Latgalian bat/ltg
5 lv --/M Latvian bat/lv
5 mi --/M Māori poz/mi
5 mk --/M Macedonian zls/mk
5 ml --/M Malayalam dra/ml
5 mr --/M Marathi inc/mr
5 ms --/M Malay poz/ms
5 mt --/M Maltese sem/mt
5 mto --/M Totontepec_Mixe miz/mto
5 my --/M Myanmar_(Burmese) sit/my
5 nb --/M Norwegian_Bokmål gmq/nb (no 5)
5 nci --/M Nahuatl_(Classical) azc/nci
5 ne --/M Nepali inc/ne
5 nl --/M Dutch gmw/nl
5 nog --/M Nogai trk/nog
5 om --/M Oromo cus/om
5 or --/M Oriya inc/or
5 pa --/M Punjabi inc/pa
5 pap --/M Papiamento roa/pap
5 piqd --/M Klingon art/piqd
5 pl --/M Polish zlw/pl
5 pt --/M Portuguese_(Portugal) roa/pt (pt-pt 5)
5 pt-br --/M Portuguese_(Brazil) roa/pt-BR (pt 6)
5 py --/M Pyash art/py
5 qdb --/M Lang_Belta art/qdb
5 qu --/M Quechua qu
5 quc --/M K'iche' myn/quc
5 qya --/M Quenya art/qya
5 ro --/M Romanian roa/ro
5 ru --/M Russian zle/ru
5 ru-cl --/M Russian_(Classic) zle/ru-cl
2 ru-lv --/M Russian_(Latvia) zle/ru-LV
5 sd --/M Sindhi inc/sd
5 shn --/M Shan_(Tai_Yai) tai/shn
5 si --/M Sinhala inc/si
5 sjn --/M Sindarin art/sjn
5 sk --/M Slovak zlw/sk
5 sl --/M Slovenian zls/sl
5 smj --/M Lule_Saami urj/smj
5 sq --/M Albanian ine/sq
5 sr --/M Serbian zls/sr
5 sv --/M Swedish gmq/sv
5 sw --/M Swahili bnt/sw
5 ta --/M Tamil dra/ta
5 te --/M Telugu dra/te
5 th --/M Thai tai/th
5 tk --/M Turkmen trk/tk
5 tn --/M Setswana bnt/tn
5 tr --/M Turkish trk/tr
5 tt --/M Tatar trk/tt
5 ug --/M Uyghur trk/ug
5 uk --/M Ukrainian zle/uk
5 ur --/M Urdu inc/ur
5 uz --/M Uzbek trk/uz
5 vi --/M Vietnamese_(Northern) aav/vi
5 vi-vn-x-central --/M Vietnamese_(Central) aav/vi-VN-x-central
5 vi-vn-x-south --/M Vietnamese_(Southern) aav/vi-VN-x-south
5 yue --/M Chinese_(Cantonese) sit/yue (zh-yue 5)(zh 8)
5 yue --/M Chinese_(Cantonese,_latin_as_Jyutping) sit/yue-Latn-jyutping (zh-yue 5)(zh 8)
"""

View File

@ -5,9 +5,9 @@ This file prints the text field of supervisions from cutset to the console
"""
import argparse
from pathlib import Path
from lhotse import load_manifest_lazy
from pathlib import Path
def get_args():

View File

@ -5,7 +5,6 @@ This file generates words.txt from the given transcript file.
"""
import argparse
from pathlib import Path

View File

@ -29,7 +29,6 @@ import torch
import torch.nn as nn
from asr_datamodule import SwitchBoardAsrDataModule
from conformer import Conformer
from sclite_scoring import asr_text_post_processing
from icefall.bpe_graph_compiler import BpeCtcTrainingGraphCompiler

View File

@ -16,8 +16,8 @@
# limitations under the License.
import argparse
from pathlib import Path
import logging
from pathlib import Path
from typing import List

View File

@ -45,6 +45,7 @@ import sentencepiece as spm
import torch
import torchaudio
from torch.nn.utils.rnn import pad_sequence
from icefall import smart_byte_decode

View File

@ -19,9 +19,9 @@ import k2
import torch
import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos, make_pad_mask
from scaling import ScaledLinear
class Transducer(nn.Module):

View File

@ -17,10 +17,10 @@
import argparse
import logging
import torch
import lhotse
from pathlib import Path
import lhotse
import torch
from lhotse import (
CutSet,
Fbank,
@ -29,6 +29,7 @@ from lhotse import (
fix_manifests,
validate_recordings_and_supervisions,
)
from icefall.utils import get_executor, str2bool
# Torch's multithreaded behavior needs to be disabled or

View File

@ -41,6 +41,7 @@ from prepare_lang import (
write_lexicon,
write_mapping,
)
from icefall.utils import text_to_pinyin

View File

@ -74,10 +74,10 @@ It will generate the following 3 files inside $repo/exp:
import argparse
import logging
from icefall import is_module_available
import torch
from onnx_pretrained import OnnxModel
import torch
from icefall import is_module_available
def get_parser():

View File

@ -30,9 +30,7 @@ import k2
import torch
import torch.nn as nn
from asr_datamodule import WenetSpeechAsrDataModule
from beam_search import (
keywords_search,
)
from beam_search import keywords_search
from lhotse.cut import Cut
from train import add_model_arguments, get_model, get_params

View File

@ -87,6 +87,19 @@ from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from train import (
add_model_arguments,
add_training_arguments,
compute_validation_loss,
display_and_save_batch,
get_adjusted_batch_count,
get_model,
get_params,
load_checkpoint_if_available,
save_checkpoint,
scan_pessimistic_batches_for_oom,
set_batch_count,
)
from icefall import diagnostics
from icefall.char_graph_compiler import CharCtcTrainingGraphCompiler
@ -109,21 +122,6 @@ from icefall.utils import (
text_to_pinyin,
)
from train import (
add_model_arguments,
add_training_arguments,
compute_validation_loss,
display_and_save_batch,
get_adjusted_batch_count,
get_model,
get_params,
load_checkpoint_if_available,
save_checkpoint,
scan_pessimistic_batches_for_oom,
set_batch_count,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]

View File

@ -99,7 +99,6 @@ from icefall.utils import (
text_to_pinyin,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]

View File

@ -18,9 +18,8 @@ you can use ./export.py --jit 1
import argparse
import logging
from typing import List
import math
from typing import List
import k2
import kaldifeat

View File

@ -8,7 +8,6 @@
import re
import unicodedata
WHITESPACE_NORMALIZER = re.compile(r"\s+")
SPACE = chr(32)
SPACE_ESCAPE = chr(9601)

View File

@ -8,12 +8,12 @@ The lang_dir should contain the following files:
"""
import math
import re
from collections import defaultdict
from pathlib import Path
from typing import List, Tuple
import kaldifst
import re
class Lexicon:

View File

@ -18,7 +18,7 @@
import random
from dataclasses import dataclass
from typing import Optional, Tuple, List
from typing import List, Optional, Tuple
import torch
from torch import Tensor, nn

View File

@ -5,14 +5,15 @@
# This is modified from https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/profiling/flops_profiler/profiler.py
from collections import OrderedDict
from functools import partial
from typing import List, Optional
import k2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import partial
from typing import List, Optional
from collections import OrderedDict
import numpy as np
Tensor = torch.Tensor

View File

@ -5,16 +5,16 @@
import argparse
import logging
from pathlib import Path
from typing import Dict
import onnx
import torch
from model import RnnLmModel
from onnxruntime.quantization import QuantType, quantize_dynamic
from train import get_params
from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint
from icefall.utils import AttributeDict, str2bool
from typing import Dict
from train import get_params
def add_meta_data(filename: str, meta_data: Dict[str, str]):

View File

@ -8,6 +8,10 @@ pypinyin==0.50.0
tensorboard
typeguard
dill
black==22.3.0
onnx==1.15.0
onnxruntime==1.16.3
# style check session:
black==22.3.0
isort==5.10.1
flake8==5.0.4