Use GPU for averaging checkpoints if possible. (#84)

2025-08-08 09:32:20 +00:00 · 2021-10-26 17:10:04 +08:00 · 2021-10-26 17:10:04 +08:00 · 8cb7f712e4
commit 8cb7f712e4
parent 712ead8207
3 changed files with 11 additions and 5 deletions
--- a/egs/librispeech/ASR/conformer_ctc/decode.py
+++ b/egs/librispeech/ASR/conformer_ctc/decode.py
@ -640,7 +640,8 @@ def main():
            if start >= 0:
                filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
        logging.info(f"averaging {filenames}")
-        model.load_state_dict(average_checkpoints(filenames))
+        model.to(device)
+        model.load_state_dict(average_checkpoints(filenames, device=device))

    if params.export:
        logging.info(f"Export averaged model to {params.exp_dir}/pretrained.pt")
--- a/egs/librispeech/ASR/tdnn_lstm_ctc/decode.py
+++ b/egs/librispeech/ASR/tdnn_lstm_ctc/decode.py
@ -457,7 +457,8 @@ def main():
            if start >= 0:
                filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
        logging.info(f"averaging {filenames}")
-        model.load_state_dict(average_checkpoints(filenames))
+        model.to(device)
+        model.load_state_dict(average_checkpoints(filenames, device=device))

    if params.export:
        logging.info(f"Export averaged model to {params.exp_dir}/pretrained.pt")
--- a/icefall/checkpoint.py
+++ b/icefall/checkpoint.py
@ -120,22 +120,26 @@ def load_checkpoint(
    return checkpoint


-def average_checkpoints(filenames: List[Path]) -> dict:
+def average_checkpoints(
+    filenames: List[Path], device: torch.device = torch.device("cpu")
+) -> dict:
    """Average a list of checkpoints.

    Args:
      filenames:
        Filenames of the checkpoints to be averaged. We assume all
        checkpoints are saved by :func:`save_checkpoint`.
+      device:
+        Move checkpoints to this device before averaging.
    Returns:
      Return a dict (i.e., state_dict) which is the average of all
      model state dicts contained in the checkpoints.
    """
    n = len(filenames)

-    avg = torch.load(filenames[0], map_location="cpu")["model"]
+    avg = torch.load(filenames[0], map_location=device)["model"]
    for i in range(1, n):
-        state_dict = torch.load(filenames[i], map_location="cpu")["model"]
+        state_dict = torch.load(filenames[i], map_location=device)["model"]
        for k in avg:
            avg[k] += state_dict[k]