This commit is contained in:
jinzr 2024-03-04 14:21:21 +08:00
parent dcd947d692
commit 3d1a9680a8
2 changed files with 7 additions and 8 deletions

View File

@ -115,9 +115,6 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
cat ./data/lang_bpe_500/transcript_words.txt \
>> $lang_dir/text_words_segmentation
cat ./data/lang_char/text_words_segmentation \
>> $lang_dir/text_words_segmentation
fi
cat $lang_dir/text_words_segmentation | sed 's/ /\n/g' \

View File

@ -28,8 +28,6 @@ from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from pypinyin import pinyin, lazy_pinyin
from pypinyin.contrib.tone_convert import to_initials, to_finals_tone, to_finals
from shutil import copyfile
from typing import Dict, Iterable, List, Optional, TextIO, Tuple, Union
@ -40,6 +38,8 @@ import sentencepiece as spm
import torch
import torch.distributed as dist
import torch.nn as nn
from pypinyin import lazy_pinyin, pinyin
from pypinyin.contrib.tone_convert import to_finals, to_finals_tone, to_initials
from torch.utils.tensorboard import SummaryWriter
from icefall.checkpoint import average_checkpoints
@ -1080,10 +1080,12 @@ def write_surt_error_stats(
print(
f"{cut_id}:\t"
+ " ".join(
(
(
ref_word
if ref_word == hyp_word
else f"({ref_word}->{hyp_word})"
)
for ref_word, hyp_word in ali
)
),