From 5c8e9628cc39b9fb1e471d53df9aec06b2602b97 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 13 Jan 2023 15:21:29 +0800 Subject: [PATCH] update faq for libpython3.10.so not found (#838) --- docs/source/conf.py | 3 + docs/source/faqs.rst | 40 ++++++++++++ .../librispeech/distillation.rst | 65 ++++++++++--------- .../pruned_transducer_stateless.rst | 2 + 4 files changed, 79 insertions(+), 31 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 33429f74c..ef9fe1445 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -81,6 +81,9 @@ todo_include_todos = True rst_epilog = """ .. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn +.. _icefall: https://github.com/k2-fsa/icefall .. _git-lfs: https://git-lfs.com/ .. _ncnn: https://github.com/tencent/ncnn +.. _LibriSpeech: https://www.openslr.org/12 +.. _musan: http://www.openslr.org/17/ """ diff --git a/docs/source/faqs.rst b/docs/source/faqs.rst index c70ded431..72b0302d7 100644 --- a/docs/source/faqs.rst +++ b/docs/source/faqs.rst @@ -65,3 +65,43 @@ The fix is: pip uninstall setuptools pip install setuptools==58.0.4 + +ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory +-------------------------------------------------------------------------------------------- + +If you are using ``conda`` and encounter the following issue: + +.. code-block:: + + Traceback (most recent call last): + File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 24, in + from _k2 import DeterminizeWeightPushingType + ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory + + During handling of the above exception, another exception occurred: + + Traceback (most recent call last): + File "/k2-dev/yangyifan/icefall/egs/librispeech/ASR/./pruned_transducer_stateless7_ctc_bs/decode.py", line 104, in + import k2 + File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 30, in + raise ImportError( + ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory + Note: If you're using anaconda and importing k2 on MacOS, + you can probably fix this by setting the environment variable: + export DYLD_LIBRARY_PATH=$CONDA_PREFIX/lib/python3.10/site-packages:$DYLD_LIBRARY_PATH + +Please first try to find where ``libpython3.10.so.1.0`` locates. + +For instance, + +.. code-block:: bash + + cd $CONDA_PREFIX/lib + find . -name "libpython*" + +If you are able to find it inside ``$CODNA_PREFIX/lib``, please set the +following environment variable: + +.. code-block:: bash + + export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH diff --git a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst index aa379c3f8..ea9f350cd 100644 --- a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst +++ b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst @@ -1,16 +1,16 @@ Distillation with HuBERT ======================== -This totorial shows you how to perform knowledge distillation in ``icefall`` -with the `LibriSpeech `_ dataset. The distillation method -used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD). +This tutorial shows you how to perform knowledge distillation in `icefall`_ +with the `LibriSpeech`_ dataset. The distillation method +used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD). Please have a look at our paper `Predicting Multi-Codebook Vector Quantization Indexes for Knowledge Distillation `_ for more details about MVQ-KD. .. note:: This tutorial is based on recipe - `pruned_transducer_stateless4 `_. + `pruned_transducer_stateless4 `_. Currently, we only implement MVQ-KD in this recipe. However, MVQ-KD is theoretically applicable to all recipes with only minor changes needed. Feel free to try out MVQ-KD in different recipes. If you encounter any problems, please open an issue here `icefall `_. @@ -18,7 +18,7 @@ for more details about MVQ-KD. .. note:: We assume you have read the page :ref:`install icefall` and have setup - the environment for ``icefall``. + the environment for `icefall`_. .. HINT:: @@ -27,13 +27,13 @@ for more details about MVQ-KD. Data preparation ---------------- -We first prepare necessary training data for ``LibriSpeech``. -This is the same as in `Pruned_transducer_statelessX <./pruned_transducer_stateless.rst>`_. +We first prepare necessary training data for `LibriSpeech`_. +This is the same as in :ref:`non_streaming_librispeech_pruned_transducer_stateless`. .. hint:: The data preparation is the same as other recipes on LibriSpeech dataset, - if you have finished this step, you can skip to ``Codebook index preparation`` directly. + if you have finished this step, you can skip to :ref:`codebook_index_preparation` directly. .. code-block:: bash @@ -61,8 +61,8 @@ For example, .. HINT:: - If you have pre-downloaded the `LibriSpeech `_ - dataset and the `musan `_ dataset, say, + If you have pre-downloaded the `LibriSpeech`_ + dataset and the `musan`_ dataset, say, they are saved in ``/tmp/LibriSpeech`` and ``/tmp/musan``, you can modify the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that ``./prepare.sh`` won't re-download them. @@ -84,24 +84,27 @@ We provide the following YouTube video showing how to run ``./prepare.sh``. .. youtube:: ofEIoJL-mGM +.. _codebook_index_preparation: + Codebook index preparation -------------------------- Here, we prepare necessary data for MVQ-KD. This requires the generation of codebook indexes (please read our `paper `_. -if you are interested in details). In this tutorial, we use the pre-computed -codebook indexes for convenience. The only thing you need to do is to -run ``./distillation_with_hubert.sh``. +if you are interested in details). In this tutorial, we use the pre-computed +codebook indexes for convenience. The only thing you need to do is to +run `./distillation_with_hubert.sh `_. .. note:: - There are 5 stages in total, the first and second stage will be automatically skipped - when choosing to downloaded codebook indexes prepared by `icefall`_. - Of course, you can extract and compute the codebook indexes by yourself. This - will require you downloading a HuBERT-XL model and it can take a while for - the extraction of codebook indexes. - -As usual, you can control the stages you want to run by specifying the following + There are 5 stages in total, the first and second stage will be automatically skipped + when choosing to downloaded codebook indexes prepared by `icefall`_. + Of course, you can extract and compute the codebook indexes by yourself. This + will require you downloading a HuBERT-XL model and it can take a while for + the extraction of codebook indexes. + + +As usual, you can control the stages you want to run by specifying the following two options: - ``--stage`` @@ -115,7 +118,7 @@ For example, $ ./distillation_with_hubert.sh --stage 0 --stop-stage 0 # run only stage 0 $ ./distillation_with_hubert.sh --stage 2 --stop-stage 4 # run from stage 2 to stage 5 -Here are a few options in ``./distillation_with_hubert.sh`` +Here are a few options in `./distillation_with_hubert.sh `_ you need to know before you proceed. - ``--full_libri`` If True, use full 960h data. Otherwise only ``train-clean-100`` will be used @@ -126,14 +129,14 @@ Since we are using the pre-computed codebook indexes, we set ``use_extracted_codebook=True``. If you want to do full `LibriSpeech`_ experiments, please set ``full_libri=True``. -The following command downloads the pre-computed codebook indexes -and prepares MVQ-augmented training manifests. +The following command downloads the pre-computed codebook indexes +and prepares MVQ-augmented training manifests. .. code-block:: bash $ ./distillation_with_hubert.sh --stage 2 --stop-stage 2 # run only stage 2 -Please see the +Please see the following screenshot for the output of an example execution. .. figure:: ./images/distillation_codebook.png @@ -146,12 +149,12 @@ following screenshot for the output of an example execution. .. hint:: The codebook indexes we prepared for you in this tutorial - are extracted from the 36-th layer of a fine-tuned HuBERT-XL model + are extracted from the 36-th layer of a fine-tuned HuBERT-XL model with 8 codebooks. If you want to try other configurations, please - set ``use_extracted_codebook=False`` and set ``embedding_layer`` and + set ``use_extracted_codebook=False`` and set ``embedding_layer`` and ``num_codebooks`` by yourself. -Now, you should see the following files under the direcory ``./data/vq_fbank_layer36_cb8``. +Now, you should see the following files under the directory ``./data/vq_fbank_layer36_cb8``. .. figure:: ./images/distillation_directory.png :width: 800 @@ -165,7 +168,7 @@ Whola! You are ready to perform knowledge distillation training now! Training -------- -To perform training, please run stage 3 by executing the following command. +To perform training, please run stage 3 by executing the following command. .. code-block:: bash @@ -176,7 +179,7 @@ Here is the code snippet for training: .. code-block:: bash WORLD_SIZE=$(echo ${CUDA_VISIBLE_DEVICES} | awk '{n=split($1, _, ","); print n}') - + ./pruned_transducer_stateless6/train.py \ --manifest-dir ./data/vq_fbank_layer36_cb8 \ --master-port 12359 \ @@ -191,6 +194,7 @@ Here is the code snippet for training: There are a few training arguments in the following training commands that should be paid attention to. + - ``--enable-distillation`` If True, knowledge distillation training is enabled. - ``--codebook-loss-scale`` The scale of the knowledge distillation loss. - ``--manifest-dir`` The path to the MVQ-augmented manifest. @@ -204,7 +208,7 @@ the following command. .. code-block:: bash - export CUDA_VISIBLE_DEVICES=0 + export CUDA_VISIBLE_DEVICES=0 ./pruned_transducer_stateless6/train.py \ --decoding-method "modified_beam_search" \ --epoch 30 \ @@ -217,4 +221,3 @@ You should get similar results as `here `_. - diff --git a/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst b/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst index 86d43c8fe..42fd3df77 100644 --- a/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst +++ b/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst @@ -1,3 +1,5 @@ +.. _non_streaming_librispeech_pruned_transducer_stateless: + Pruned transducer statelessX ============================