mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
53 lines
1.9 KiB
Prolog
Executable File
53 lines
1.9 KiB
Prolog
Executable File
#!/usr/bin/env perl
|
|
|
|
# Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
|
|
# matches case-insensitive --Arnab (Jan 2013)
|
|
|
|
if ($ARGV[0] eq "-f") {
|
|
shift @ARGV;
|
|
$field_spec = shift @ARGV;
|
|
if ($field_spec =~ m/^\d+$/) {
|
|
$field_begin = $field_spec - 1; $field_end = $field_spec - 1;
|
|
}
|
|
if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesy (properly, 1-10)
|
|
if ($1 ne "") {
|
|
$field_begin = $1 - 1; # Change to zero-based indexing.
|
|
}
|
|
if ($2 ne "") {
|
|
$field_end = $2 - 1; # Change to zero-based indexing.
|
|
}
|
|
}
|
|
if (!defined $field_begin && !defined $field_end) {
|
|
die "Bad argument to -f option: $field_spec";
|
|
}
|
|
}
|
|
|
|
|
|
while (<>) {
|
|
@A = split(" ", $_);
|
|
for ($n = 0; $n < @A; $n++) {
|
|
$a = $A[$n];
|
|
if ( (!defined $field_begin || $n >= $field_begin)
|
|
&& (!defined $field_end || $n <= $field_end)) {
|
|
# e.g. [LAUGHTER-STORY] -> STORY;
|
|
$a =~ s:(|\-)^\[LAUGHTER-(.+)\](|\-)$:$1$2$3:i;
|
|
# $1 and $3 relate to preserving trailing "-"
|
|
$a =~ s:^\[(.+)/.+\](|\-)$:$1$2:; # e.g. [IT'N/ISN'T] -> IT'N ... note,
|
|
# 1st part may include partial-word stuff, which we process further below,
|
|
# e.g. [LEM[GUINI]-/LINGUINI]
|
|
# the (|\_) at the end is to accept and preserve trailing -'s.
|
|
$a =~ s:^(|\-)\[[^][]+\](.+)$:-$2:; # e.g. -[AN]Y , note \047 is quote;
|
|
# let the leading - be optional on input, as sometimes omitted.
|
|
$a =~ s:^(.+)\[[^][]+\](|\-)$:$1-:; # e.g. AB[SOLUTE]- -> AB-;
|
|
# let the trailing - be optional on input, as sometimes omitted.
|
|
$a =~ s:([^][]+)\[.+\]$:$1:; # e.g. EX[SPECIALLY]-/ESPECIALLY] -> EX-
|
|
# which is a mistake in the input.
|
|
$a =~ s:^\{(.+)\}$:$1:; # e.g. {YUPPIEDOM} -> YUPPIEDOM
|
|
$a =~ s:[A-Z]\[([^][])+\][A-Z]:$1-$3:i; # e.g. AMMU[N]IT- -> AMMU-IT-
|
|
$a =~ s:_\d$::; # e.g. THEM_1 -> THEM
|
|
}
|
|
$A[$n] = $a;
|
|
}
|
|
print join(" ", @A) . "\n";
|
|
}
|