From 3d1a9680a8ac8ceef4fb3afe961c05ce804aa3b9 Mon Sep 17 00:00:00 2001 From: jinzr Date: Mon, 4 Mar 2024 14:21:21 +0800 Subject: [PATCH] updated --- egs/multi_zh_en/ASR/prepare.sh | 3 --- icefall/utils.py | 12 +++++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/egs/multi_zh_en/ASR/prepare.sh b/egs/multi_zh_en/ASR/prepare.sh index 46ff17083..a1530be29 100755 --- a/egs/multi_zh_en/ASR/prepare.sh +++ b/egs/multi_zh_en/ASR/prepare.sh @@ -115,9 +115,6 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then cat ./data/lang_bpe_500/transcript_words.txt \ >> $lang_dir/text_words_segmentation - - cat ./data/lang_char/text_words_segmentation \ - >> $lang_dir/text_words_segmentation fi cat $lang_dir/text_words_segmentation | sed 's/ /\n/g' \ diff --git a/icefall/utils.py b/icefall/utils.py index 7d722b1bc..2cb2edf93 100644 --- a/icefall/utils.py +++ b/icefall/utils.py @@ -28,8 +28,6 @@ from contextlib import contextmanager from dataclasses import dataclass from datetime import datetime from pathlib import Path -from pypinyin import pinyin, lazy_pinyin -from pypinyin.contrib.tone_convert import to_initials, to_finals_tone, to_finals from shutil import copyfile from typing import Dict, Iterable, List, Optional, TextIO, Tuple, Union @@ -40,6 +38,8 @@ import sentencepiece as spm import torch import torch.distributed as dist import torch.nn as nn +from pypinyin import lazy_pinyin, pinyin +from pypinyin.contrib.tone_convert import to_finals, to_finals_tone, to_initials from torch.utils.tensorboard import SummaryWriter from icefall.checkpoint import average_checkpoints @@ -1081,9 +1081,11 @@ def write_surt_error_stats( f"{cut_id}:\t" + " ".join( ( - ref_word - if ref_word == hyp_word - else f"({ref_word}->{hyp_word})" + ( + ref_word + if ref_word == hyp_word + else f"({ref_word}->{hyp_word})" + ) for ref_word, hyp_word in ali ) ),