mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
* update manifest stats * update transcript configs * lang_char and compute_fbanks * save cuts in fbank_dir * add core codes * update decode.py * Create local/utils * tidy up * parse raw in prepare_lang_char.py * update manifest stats * update transcript configs * lang_char and compute_fbanks * save cuts in fbank_dir * add core codes * update decode.py * Create local/utils * tidy up * parse raw in prepare_lang_char.py * working train * Add compare_cer_transcript.py * fix tokenizer decode, allow d2f only * comment cleanup * add export files and READMEs * reword average column * fix comments * Update new results
80 lines
2.8 KiB
INI
80 lines
2.8 KiB
INI
[CONSTANTS]
|
||
; # Name of this mode
|
||
MODE = disfluent
|
||
|
||
[DECISIONS]
|
||
; # フィラー、感情表出系感動詞
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(F ぎょっ)'
|
||
F = 0
|
||
; # 言い直し、いいよどみなどによる語断片
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(D だ)(D だいが) 大学の学部の会議'
|
||
D = 0
|
||
; # 助詞、助動詞、接辞の言い直し
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '西洋 (D2 的)(F えー)(D ふ) 風というか'
|
||
D2 = 0
|
||
; # 聞き取りや語彙の判断に自信がない場合
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: (? 字数) の
|
||
; # If no option: empty string is returned regardless of output
|
||
; # Example: '(?) で'
|
||
? = 0
|
||
; # タグ?で、値は複数の候補が想定される場合
|
||
; # 0 for main guess with matching morph info, 1 for second guess
|
||
; # Example: '(? 次数, 実数)', '(? これ,ここで)+(? 説明+し+た+方+が+いい+か+な)'
|
||
?, = 0
|
||
; # 音や言葉に関するメタ的な引用
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '助詞の (M は) は (M は) と書くが発音は (M わ)'
|
||
M = 0
|
||
; # 外国語や古語、方言など
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(O ザッツファイン)'
|
||
O = 0
|
||
; # 講演者の名前、差別語、誹謗中傷など
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '国語研の (R ××) です'
|
||
R = 0
|
||
; # 非朗読対象発話(朗読における言い間違い等)
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(X 実際は) 実際には'
|
||
X = 0
|
||
; # アルファベットや算用数字、記号の表記
|
||
; # 0 to use Japanese form, 1 to use alphabet form
|
||
; # Example: '(A シーディーアール;CD-R)'
|
||
A = 1
|
||
; # タグAで、単語は算用数字の場合
|
||
; # 0 to use Japanese form, 1 to use Arabic numerals
|
||
; # Example: (A 二千;2000)
|
||
A_num = 0
|
||
; # 何らかの原因で漢字表記できなくなった場合
|
||
; # 0 to use broken form, 1 to use orthodox form
|
||
; # Example: '(K たち (F えー) ばな;橘)'
|
||
K = 1
|
||
; # 転訛、発音の怠けなど、一時的な発音エラー
|
||
; # 0 to use wrong form, 1 to use orthodox form
|
||
; # Example: '(W ギーツ;ギジュツ)'
|
||
W = 1
|
||
; # 語の読みに関する知識レベルのいい間違い
|
||
; # 0 to use wrong form, 1 to use orthodox form
|
||
; # Example: '(B シブタイ;ジュータイ)'
|
||
B = 0
|
||
; # 笑いながら発話
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(笑 ナニガ)', '(笑 (F エー)+ソー+イッ+タ+ヨー+ナ)'
|
||
笑 = 0
|
||
; # 泣きながら発話
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(泣 ドンナニ)'
|
||
泣 = 0
|
||
; # 咳をしながら発話
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: 'シャ(咳 リン) ノ'
|
||
咳 = 0
|
||
; # ささやき声や独り言などの小さな声
|
||
; # 0 to remain, 1 to delete
|
||
; # Example: '(L アレコレナンダッケ)', '(L (W コデ;(? コレ,ココデ))+(? セツメー+シ+タ+ホー+ガ+イー+カ+ナ))'
|
||
L = 0
|