add lexicon

2025-12-10 22:45:27 +00:00 · 2024-04-06 23:27:23 +08:00 · 2024-04-06 23:27:23 +08:00 · c25dc02d5d
commit c25dc02d5d
parent bfae73cb74
7 changed files with 124 additions and 9 deletions
--- a/.github/scripts/aishell3/TTS/run.sh
+++ b/.github/scripts/aishell3/TTS/run.sh
@ -39,6 +39,13 @@ function prepare_data() {
  echo "------------------------------"
  wc -l data/tokens.txt
  echo "------------------------------"
+
+  echo "----------lexicon.txt----------"
+  head data/lexicon.txt
+  echo "----"
+  tail data/lexicon.txt
+  echo "----"
+  wc -l data/lexicon.txt
 }

 function train() {
@ -47,7 +54,8 @@ function train() {
  git diff .
  popd

-  for t in low medium high; do
+  # for t in low medium high; do
+  for t in low; do
    ./vits/train.py \
      --exp-dir vits/exp-$t \
      --model-type $t \
@ -62,12 +70,13 @@ function train() {
 }

 function export_onnx() {
-  for t in low medium high; do
+  # for t in low medium high; do
+  for t in low; do
    ./vits/export-onnx.py \
      --model-type $t \
      --epoch 1 \
      --exp-dir ./vits/exp-$t \
-      --tokens data/tokens.txt
+      --tokens data/tokens.txt \
      --speakers ./data/speakers.txt

    ls -lh vits/exp-$t/
@ -75,7 +84,30 @@ function export_onnx() {
 }

 function test_low() {
-  echo "TODO"
+  git clone https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06
+  repo=icefall-tts-aishell3-vits-low-2024-04-06
+
+  ./vits/export-onnx.py \
+    --model-type low \
+    --epoch 1000 \
+    --exp-dir $repo/exp \
+    --tokens $repo/data/tokens.txt \
+    --speakers $repo/data/speakers.txt
+
+  ls -lh $repo/exp/vits-epoch-1000.onnx
+
+  python3 -m pip install sherpa-onnx
+
+  sherpa-onnx-offline-tts \
+    --vits-model=$repo/exp/vits-epoch-960.onnx \
+    --vits-tokens=$repo/data/tokens.txt \
+    --vits-lexicon=$repo/data/lexicon.txt \
+    --num-threads=1 \
+    --vits-length-scale=1.0 \
+    --sid=33 \
+    --output-filename=/icefall/low.wav \
+    --debug=1 \
+    "这是一个语音合成测试"
 }


--- a/.github/workflows/aishell3.yml
+++ b/.github/workflows/aishell3.yml
@ -1,4 +1,4 @@
-name: aishell
+name: aishell3

 on:
  push:
@ -71,3 +71,14 @@ jobs:
              git config --global --add safe.directory /icefall

              .github/scripts/aishell3/TTS/run.sh
+
+      - name: display files
+        shell: bash
+        run: |
+          ls -lh
+
+      - uses: actions/upload-artifact@v4
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
+        with:
+          name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
+          path: ./*.wav
--- a/egs/aishell3/TTS/local/generate_lexicon.py
+++ b/egs/aishell3/TTS/local/generate_lexicon.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+"""
+This file generates the file lexicon.txt that contains pronunciations of all
+words and phrases
+"""
+
+from pypinyin import phrases_dict, pinyin_dict
+from tokenizer import Tokenizer
+
+import argparse
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--tokens",
+        type=str,
+        default="data/tokens.txt",
+        help="""Path to vocabulary.""",
+    )
+
+    parser.add_argument(
+        "--lexicon",
+        type=str,
+        default="data/lexicon.txt",
+        help="""Path to save the generated lexicon.""",
+    )
+    return parser
+
+
+def main():
+    args = get_parser().parse_args()
+    filename = args.lexicon
+    tokens = args.tokens
+    tokenizer = Tokenizer(tokens)
+
+    word_dict = pinyin_dict.pinyin_dict
+    phrases = phrases_dict.phrases_dict
+
+    i = 0
+    with open(filename, "w", encoding="utf-8") as f:
+        for key in word_dict:
+            if not (0x4E00 <= key <= 0x9FFF):
+                continue
+
+            w = chr(key)
+
+            # 1 to remove the initial sil
+            # :-1 to remove the final eos
+            tokens = tokenizer.text_to_tokens(w)[1:-1]
+
+            tokens = " ".join(tokens)
+            f.write(f"{w} {tokens}\n")
+
+        # TODO(fangjun): Add phrases
+        #  for key in phrases:
+        #      # 1 to remove the initial sil
+        #      # :-1 to remove the final eos
+        #      tokens = tokenizer.text_to_tokens(key)[1:-1]
+        #      tokens = " ".join(tokens)
+        #      f.write(f"{key} {tokens}\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/egs/aishell3/TTS/local/prepare_token_file.py
+++ b/egs/aishell3/TTS/local/prepare_token_file.py
@ -17,7 +17,7 @@


 """
-This file generates the file that maps tokens to IDs.
+This file generates the file tokens.txt that maps tokens to IDs.
 """

 import argparse
--- a/egs/aishell3/TTS/prepare.sh
+++ b/egs/aishell3/TTS/prepare.sh
@ -121,10 +121,14 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
 fi

 if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
-  log "Stage 6: Generate token file"
+  log "Stage 6: Generate tokens.txt and lexicon.txt "
  if [ ! -e data/tokens.txt ]; then
    ./local/prepare_token_file.py --tokens data/tokens.txt
  fi
+
+  if [ ! -e data/lexicon.txt ]; then
+    ./local/generate_lexicon.py --tokens data/tokens.txt --lexicon data/lexicon.txt
+  fi
 fi

 if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
--- a/egs/aishell3/TTS/vits/export-onnx.py
+++ b/egs/aishell3/TTS/vits/export-onnx.py
@ -84,7 +84,7 @@ def get_parser():
    parser.add_argument(
        "--model-type",
        type=str,
-        default="medium",
+        default="low",
        choices=["low", "medium", "high"],
        help="""If not empty, valid values are: low, medium, high.
        It controls the model size. low -> runs faster.
--- a/egs/aishell3/TTS/vits/train.py
+++ b/egs/aishell3/TTS/vits/train.py
@ -156,7 +156,7 @@ def get_parser():
    parser.add_argument(
        "--model-type",
        type=str,
-        default="medium",
+        default="low",
        choices=["low", "medium", "high"],
        help="""If not empty, valid values are: low, medium, high.
        It controls the model size. low -> runs faster.