mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
add lexicon
This commit is contained in:
parent
bfae73cb74
commit
c25dc02d5d
40
.github/scripts/aishell3/TTS/run.sh
vendored
40
.github/scripts/aishell3/TTS/run.sh
vendored
@ -39,6 +39,13 @@ function prepare_data() {
|
|||||||
echo "------------------------------"
|
echo "------------------------------"
|
||||||
wc -l data/tokens.txt
|
wc -l data/tokens.txt
|
||||||
echo "------------------------------"
|
echo "------------------------------"
|
||||||
|
|
||||||
|
echo "----------lexicon.txt----------"
|
||||||
|
head data/lexicon.txt
|
||||||
|
echo "----"
|
||||||
|
tail data/lexicon.txt
|
||||||
|
echo "----"
|
||||||
|
wc -l data/lexicon.txt
|
||||||
}
|
}
|
||||||
|
|
||||||
function train() {
|
function train() {
|
||||||
@ -47,7 +54,8 @@ function train() {
|
|||||||
git diff .
|
git diff .
|
||||||
popd
|
popd
|
||||||
|
|
||||||
for t in low medium high; do
|
# for t in low medium high; do
|
||||||
|
for t in low; do
|
||||||
./vits/train.py \
|
./vits/train.py \
|
||||||
--exp-dir vits/exp-$t \
|
--exp-dir vits/exp-$t \
|
||||||
--model-type $t \
|
--model-type $t \
|
||||||
@ -62,12 +70,13 @@ function train() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function export_onnx() {
|
function export_onnx() {
|
||||||
for t in low medium high; do
|
# for t in low medium high; do
|
||||||
|
for t in low; do
|
||||||
./vits/export-onnx.py \
|
./vits/export-onnx.py \
|
||||||
--model-type $t \
|
--model-type $t \
|
||||||
--epoch 1 \
|
--epoch 1 \
|
||||||
--exp-dir ./vits/exp-$t \
|
--exp-dir ./vits/exp-$t \
|
||||||
--tokens data/tokens.txt
|
--tokens data/tokens.txt \
|
||||||
--speakers ./data/speakers.txt
|
--speakers ./data/speakers.txt
|
||||||
|
|
||||||
ls -lh vits/exp-$t/
|
ls -lh vits/exp-$t/
|
||||||
@ -75,7 +84,30 @@ function export_onnx() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function test_low() {
|
function test_low() {
|
||||||
echo "TODO"
|
git clone https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06
|
||||||
|
repo=icefall-tts-aishell3-vits-low-2024-04-06
|
||||||
|
|
||||||
|
./vits/export-onnx.py \
|
||||||
|
--model-type low \
|
||||||
|
--epoch 1000 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--tokens $repo/data/tokens.txt \
|
||||||
|
--speakers $repo/data/speakers.txt
|
||||||
|
|
||||||
|
ls -lh $repo/exp/vits-epoch-1000.onnx
|
||||||
|
|
||||||
|
python3 -m pip install sherpa-onnx
|
||||||
|
|
||||||
|
sherpa-onnx-offline-tts \
|
||||||
|
--vits-model=$repo/exp/vits-epoch-960.onnx \
|
||||||
|
--vits-tokens=$repo/data/tokens.txt \
|
||||||
|
--vits-lexicon=$repo/data/lexicon.txt \
|
||||||
|
--num-threads=1 \
|
||||||
|
--vits-length-scale=1.0 \
|
||||||
|
--sid=33 \
|
||||||
|
--output-filename=/icefall/low.wav \
|
||||||
|
--debug=1 \
|
||||||
|
"这是一个语音合成测试"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
13
.github/workflows/aishell3.yml
vendored
13
.github/workflows/aishell3.yml
vendored
@ -1,4 +1,4 @@
|
|||||||
name: aishell
|
name: aishell3
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
@ -71,3 +71,14 @@ jobs:
|
|||||||
git config --global --add safe.directory /icefall
|
git config --global --add safe.directory /icefall
|
||||||
|
|
||||||
.github/scripts/aishell3/TTS/run.sh
|
.github/scripts/aishell3/TTS/run.sh
|
||||||
|
|
||||||
|
- name: display files
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
|
||||||
|
with:
|
||||||
|
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
||||||
|
path: ./*.wav
|
||||||
|
68
egs/aishell3/TTS/local/generate_lexicon.py
Executable file
68
egs/aishell3/TTS/local/generate_lexicon.py
Executable file
@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
This file generates the file lexicon.txt that contains pronunciations of all
|
||||||
|
words and phrases
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pypinyin import phrases_dict, pinyin_dict
|
||||||
|
from tokenizer import Tokenizer
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def get_parser():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tokens",
|
||||||
|
type=str,
|
||||||
|
default="data/tokens.txt",
|
||||||
|
help="""Path to vocabulary.""",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--lexicon",
|
||||||
|
type=str,
|
||||||
|
default="data/lexicon.txt",
|
||||||
|
help="""Path to save the generated lexicon.""",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_parser().parse_args()
|
||||||
|
filename = args.lexicon
|
||||||
|
tokens = args.tokens
|
||||||
|
tokenizer = Tokenizer(tokens)
|
||||||
|
|
||||||
|
word_dict = pinyin_dict.pinyin_dict
|
||||||
|
phrases = phrases_dict.phrases_dict
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
with open(filename, "w", encoding="utf-8") as f:
|
||||||
|
for key in word_dict:
|
||||||
|
if not (0x4E00 <= key <= 0x9FFF):
|
||||||
|
continue
|
||||||
|
|
||||||
|
w = chr(key)
|
||||||
|
|
||||||
|
# 1 to remove the initial sil
|
||||||
|
# :-1 to remove the final eos
|
||||||
|
tokens = tokenizer.text_to_tokens(w)[1:-1]
|
||||||
|
|
||||||
|
tokens = " ".join(tokens)
|
||||||
|
f.write(f"{w} {tokens}\n")
|
||||||
|
|
||||||
|
# TODO(fangjun): Add phrases
|
||||||
|
# for key in phrases:
|
||||||
|
# # 1 to remove the initial sil
|
||||||
|
# # :-1 to remove the final eos
|
||||||
|
# tokens = tokenizer.text_to_tokens(key)[1:-1]
|
||||||
|
# tokens = " ".join(tokens)
|
||||||
|
# f.write(f"{key} {tokens}\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This file generates the file that maps tokens to IDs.
|
This file generates the file tokens.txt that maps tokens to IDs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -121,10 +121,14 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||||
log "Stage 6: Generate token file"
|
log "Stage 6: Generate tokens.txt and lexicon.txt "
|
||||||
if [ ! -e data/tokens.txt ]; then
|
if [ ! -e data/tokens.txt ]; then
|
||||||
./local/prepare_token_file.py --tokens data/tokens.txt
|
./local/prepare_token_file.py --tokens data/tokens.txt
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ ! -e data/lexicon.txt ]; then
|
||||||
|
./local/generate_lexicon.py --tokens data/tokens.txt --lexicon data/lexicon.txt
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||||
|
@ -84,7 +84,7 @@ def get_parser():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model-type",
|
"--model-type",
|
||||||
type=str,
|
type=str,
|
||||||
default="medium",
|
default="low",
|
||||||
choices=["low", "medium", "high"],
|
choices=["low", "medium", "high"],
|
||||||
help="""If not empty, valid values are: low, medium, high.
|
help="""If not empty, valid values are: low, medium, high.
|
||||||
It controls the model size. low -> runs faster.
|
It controls the model size. low -> runs faster.
|
||||||
|
@ -156,7 +156,7 @@ def get_parser():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model-type",
|
"--model-type",
|
||||||
type=str,
|
type=str,
|
||||||
default="medium",
|
default="low",
|
||||||
choices=["low", "medium", "high"],
|
choices=["low", "medium", "high"],
|
||||||
help="""If not empty, valid values are: low, medium, high.
|
help="""If not empty, valid values are: low, medium, high.
|
||||||
It controls the model size. low -> runs faster.
|
It controls the model size. low -> runs faster.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user