update display_manifest_statistics.py

This commit is contained in:
AmirHussein96 2022-07-05 17:53:32 +03:00
parent b08f4424cc
commit 65e1c9b847

View File

@ -29,13 +29,9 @@ from lhotse import load_manifest
def main(): def main():
path = "./data/fbank/cuts_train-clean-100.json.gz" # path = "./data/fbank/cuts_train.jsonl.gz"
path = "./data/fbank/cuts_train-clean-360.json.gz" path = "./data/fbank/cuts_dev.jsonl.gz"
path = "./data/fbank/cuts_train-other-500.json.gz" # path = "./data/fbank/cuts_test.jsonl.gz"
path = "./data/fbank/cuts_dev-clean.json.gz"
path = "./data/fbank/cuts_dev-other.json.gz"
path = "./data/fbank/cuts_test-clean.json.gz"
path = "./data/fbank/cuts_test-other.json.gz"
cuts = load_manifest(path) cuts = load_manifest(path)
cuts.describe() cuts.describe()
@ -45,171 +41,57 @@ if __name__ == "__main__":
main() main()
""" """
## train-clean-100 # train
Cuts count: 85617
Total duration (hours): 303.8 Cuts count: 1125309
Speech duration (hours): 303.8 (100.0%) Total duration (hours): 3403.9
Speech duration (hours): 3403.9 (100.0%)
*** ***
Duration statistics (seconds): Duration statistics (seconds):
mean 12.8 mean 10.9
std 3.8 std 10.1
min 1.3 min 0.2
0.1% 1.9 25% 5.2
0.5% 2.2 50% 7.8
1% 2.5 75% 12.7
5% 4.2 99% 52.0
10% 6.4 99.5% 65.1
25% 11.4 99.9% 99.5
50% 13.8 max 228.9
75% 15.3
90% 16.7
95% 17.3
99% 18.1
99.5% 18.4
99.9% 18.8
max 27.2
## train-clean-360
Cuts count: 312042
Total duration (hours): 1098.2
Speech duration (hours): 1098.2 (100.0%)
***
Duration statistics (seconds):
mean 12.7
std 3.8
min 1.0
0.1% 1.8
0.5% 2.2
1% 2.5
5% 4.2
10% 6.2
25% 11.2
50% 13.7
75% 15.3
90% 16.6
95% 17.3
99% 18.1
99.5% 18.4
99.9% 18.8
max 33.0
## train-other 500 # test
Cuts count: 446064 Cuts count: 5365
Total duration (hours): 1500.6 Total duration (hours): 9.6
Speech duration (hours): 1500.6 (100.0%) Speech duration (hours): 9.6 (100.0%)
***
Duration statistics (seconds):
mean 12.1
std 4.2
min 0.8
0.1% 1.7
0.5% 2.1
1% 2.3
5% 3.5
10% 5.0
25% 9.8
50% 13.4
75% 15.1
90% 16.5
95% 17.2
99% 18.1
99.5% 18.4
99.9% 18.9
max 31.0
## dev-clean
Cuts count: 2703
Total duration (hours): 5.4
Speech duration (hours): 5.4 (100.0%)
***
Duration statistics (seconds):
mean 7.2
std 4.7
min 1.4
0.1% 1.6
0.5% 1.8
1% 1.9
5% 2.4
10% 2.7
25% 3.8
50% 5.9
75% 9.3
90% 13.3
95% 16.4
99% 23.8
99.5% 28.5
99.9% 32.3
max 32.6
## dev-other
Cuts count: 2864
Total duration (hours): 5.1
Speech duration (hours): 5.1 (100.0%)
*** ***
Duration statistics (seconds): Duration statistics (seconds):
mean 6.4 mean 6.4
std 4.3 std 1.5
min 1.1 min 1.6
0.1% 1.3 25% 5.3
0.5% 1.7 50% 6.5
1% 1.8 75% 7.6
5% 2.2 99% 9.5
10% 2.6 99.5% 9.7
25% 3.5 99.9% 10.3
50% 5.3 max 12.4
75% 7.9
90% 12.0
95% 15.0
99% 22.2
99.5% 27.1
99.9% 32.4
max 35.2
## test-clean # dev
Cuts count: 2620 Cuts count: 5002
Total duration (hours): 5.4 Total duration (hours): 8.5
Speech duration (hours): 5.4 (100.0%) Speech duration (hours): 8.5 (100.0%)
*** ***
Duration statistics (seconds): Duration statistics (seconds):
mean 7.4 mean 6.1
std 5.2 std 1.7
min 1.3 min 1.5
0.1% 1.6 25% 4.8
0.5% 1.8 50% 6.2
1% 2.0 75% 7.4
5% 2.3 99% 9.5
10% 2.7 99.5% 9.7
25% 3.7 99.9% 10.1
50% 5.8 max 20.3
75% 9.6
90% 14.6
95% 17.8
99% 25.5
99.5% 28.4
99.9% 32.8
max 35.0
## test-other
Cuts count: 2939
Total duration (hours): 5.3
Speech duration (hours): 5.3 (100.0%)
***
Duration statistics (seconds):
mean 6.5
std 4.4
min 1.2
0.1% 1.5
0.5% 1.8
1% 1.9
5% 2.3
10% 2.6
25% 3.4
50% 5.2
75% 8.2
90% 12.6
95% 15.8
99% 21.4
99.5% 23.8
99.9% 33.5
max 34.5
""" """