This commit is contained in:
jinzr 2024-03-04 14:21:21 +08:00
parent dcd947d692
commit 3d1a9680a8
2 changed files with 7 additions and 8 deletions

View File

@ -115,9 +115,6 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
cat ./data/lang_bpe_500/transcript_words.txt \ cat ./data/lang_bpe_500/transcript_words.txt \
>> $lang_dir/text_words_segmentation >> $lang_dir/text_words_segmentation
cat ./data/lang_char/text_words_segmentation \
>> $lang_dir/text_words_segmentation
fi fi
cat $lang_dir/text_words_segmentation | sed 's/ /\n/g' \ cat $lang_dir/text_words_segmentation | sed 's/ /\n/g' \

View File

@ -28,8 +28,6 @@ from contextlib import contextmanager
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from pypinyin import pinyin, lazy_pinyin
from pypinyin.contrib.tone_convert import to_initials, to_finals_tone, to_finals
from shutil import copyfile from shutil import copyfile
from typing import Dict, Iterable, List, Optional, TextIO, Tuple, Union from typing import Dict, Iterable, List, Optional, TextIO, Tuple, Union
@ -40,6 +38,8 @@ import sentencepiece as spm
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.nn as nn import torch.nn as nn
from pypinyin import lazy_pinyin, pinyin
from pypinyin.contrib.tone_convert import to_finals, to_finals_tone, to_initials
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from icefall.checkpoint import average_checkpoints from icefall.checkpoint import average_checkpoints
@ -1080,10 +1080,12 @@ def write_surt_error_stats(
print( print(
f"{cut_id}:\t" f"{cut_id}:\t"
+ " ".join( + " ".join(
(
( (
ref_word ref_word
if ref_word == hyp_word if ref_word == hyp_word
else f"({ref_word}->{hyp_word})" else f"({ref_word}->{hyp_word})"
)
for ref_word, hyp_word in ali for ref_word, hyp_word in ali
) )
), ),