Update prepare.sh (#1768)

This commit is contained in:
zr_jin 2024-10-09 00:50:12 -07:00 committed by GitHub
parent 5c04c31292
commit d9844d847f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -339,7 +339,7 @@ if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then
# 2. chmod +x ./jq
# 3. cp jq /usr/bin
gunzip -c ${file} \
| jq '.text' | sed 's/"//g' > $lang_dir/transcript_words.txt
| jq '.supervisions[].text' | sed 's/"//g' > $lang_dir/transcript_words.txt
# Ensure space only appears once
sed -i 's/\t/ /g' $lang_dir/transcript_words.txt