mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
fix mmsu
This commit is contained in:
parent
cbf3af31fd
commit
e65725810c
@ -242,8 +242,7 @@ fi
|
|||||||
|
|
||||||
if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
|
if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
|
||||||
log "stage 14: Client"
|
log "stage 14: Client"
|
||||||
# datasets=(alpacaeval wildvoice mmsu advbench bbh ifeval commoneval obqa sd-qa)
|
datasets=(alpacaeval wildvoice mmsu advbench bbh ifeval commoneval obqa sd-qa)
|
||||||
datasets=(alpacaeval_full wildvoice mmsu advbench bbh ifeval sd-qa)
|
|
||||||
for dataset in ${datasets[@]}; do
|
for dataset in ${datasets[@]}; do
|
||||||
# sd-qa should use usa split
|
# sd-qa should use usa split
|
||||||
if [ $dataset == "sd-qa" ]; then
|
if [ $dataset == "sd-qa" ]; then
|
||||||
|
@ -4,7 +4,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import concatenate_datasets, load_dataset
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ def get_args():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--split-name",
|
"--split-name",
|
||||||
type=str,
|
type=str,
|
||||||
default="test", # Adjust as needed
|
default=None, # Adjust as needed
|
||||||
help="Dataset split name",
|
help="Dataset split name",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -51,13 +51,21 @@ def main():
|
|||||||
server_decode_url = f"{args.server_url}/decode"
|
server_decode_url = f"{args.server_url}/decode"
|
||||||
|
|
||||||
print("Loading dataset...")
|
print("Loading dataset...")
|
||||||
|
if args.subset_name != "mmsu":
|
||||||
dataset = load_dataset(
|
dataset = load_dataset(
|
||||||
args.dataset_name,
|
args.dataset_name,
|
||||||
args.subset_name,
|
args.subset_name,
|
||||||
split=args.split_name,
|
split=args.split_name,
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# load all splits and concatenate them
|
||||||
|
dataset = load_dataset(
|
||||||
|
args.dataset_name,
|
||||||
|
args.subset_name,
|
||||||
|
trust_remote_code=True,
|
||||||
|
)
|
||||||
|
dataset = concatenate_datasets([dataset[subset] for subset in dataset])
|
||||||
|
|
||||||
print(f"Dataset loaded with {len(dataset)} samples.")
|
print(f"Dataset loaded with {len(dataset)} samples.")
|
||||||
print(f"Sending requests to {server_decode_url}...")
|
print(f"Sending requests to {server_decode_url}...")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user