diff --git a/docs/source/for-dummies/model-export.rst b/docs/source/for-dummies/model-export.rst
index 352a0dc90..a3dd9088f 100644
--- a/docs/source/for-dummies/model-export.rst
+++ b/docs/source/for-dummies/model-export.rst
@@ -41,7 +41,7 @@ To give you an idea of what ``tdnn/exp/pretrained.pt`` contains, we can use the
 .. code-block:: python3
 
     >>> import torch
-    >>> m = torch.load("tdnn/exp/pretrained.pt")
+    >>> m = torch.load("tdnn/exp/pretrained.pt", weights_only=False)
     >>> list(m.keys())
     ['model']
     >>> list(m["model"].keys())
diff --git a/egs/aidatatang_200zh/ASR/local/prepare_lang.py b/egs/aidatatang_200zh/ASR/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100755
--- a/egs/aidatatang_200zh/ASR/local/prepare_lang.py
+++ b/egs/aidatatang_200zh/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py
index 17729e02e..d0dc36eff 100644
--- a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py
+++ b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py
@@ -224,7 +224,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/conformer_ctc/decode.py b/egs/aishell/ASR/conformer_ctc/decode.py
index 2cb476e20..90881ee40 100755
--- a/egs/aishell/ASR/conformer_ctc/decode.py
+++ b/egs/aishell/ASR/conformer_ctc/decode.py
@@ -503,7 +503,7 @@ def main():
     else:
         H = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
diff --git a/egs/aishell/ASR/conformer_ctc/pretrained.py b/egs/aishell/ASR/conformer_ctc/pretrained.py
index af1171a6f..4caff4e16 100755
--- a/egs/aishell/ASR/conformer_ctc/pretrained.py
+++ b/egs/aishell/ASR/conformer_ctc/pretrained.py
@@ -249,7 +249,7 @@ def main():
         use_feat_batchnorm=params.use_feat_batchnorm,
     )
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
@@ -315,7 +315,7 @@ def main():
         hyps = [[token_sym_table[i] for i in ids] for ids in token_ids]
     elif params.method in ["1best", "attention-decoder"]:
         logging.info(f"Loading HLG from {params.HLG}")
-        HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
+        HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu", weights_only=False))
         HLG = HLG.to(device)
         if not hasattr(HLG, "lm_scores"):
             # For whole-lattice-rescoring and attention-decoder
diff --git a/egs/aishell/ASR/conformer_mmi/decode.py b/egs/aishell/ASR/conformer_mmi/decode.py
index 8a2daa93e..c88aea41a 100755
--- a/egs/aishell/ASR/conformer_mmi/decode.py
+++ b/egs/aishell/ASR/conformer_mmi/decode.py
@@ -516,7 +516,7 @@ def main():
     else:
         H = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
diff --git a/egs/aishell/ASR/local/prepare_lang.py b/egs/aishell/ASR/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100755
--- a/egs/aishell/ASR/local/prepare_lang.py
+++ b/egs/aishell/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py
index c4aa98358..2bcf34de8 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py
@@ -227,7 +227,7 @@ def main():
     logging.info("About to create model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py
index 69fe3a40b..bf46a099b 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py
@@ -228,7 +228,7 @@ def main():
     logging.info("About to create model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py
index 46f542641..40e0565bb 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py
@@ -773,7 +773,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
index 12004315b..1972d05c8 100755
--- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
+++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
@@ -237,7 +237,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/decode.py b/egs/aishell/ASR/tdnn_lstm_ctc/decode.py
index 05e52f560..a6dfd8a75 100755
--- a/egs/aishell/ASR/tdnn_lstm_ctc/decode.py
+++ b/egs/aishell/ASR/tdnn_lstm_ctc/decode.py
@@ -337,7 +337,7 @@ def main():
 
     logging.info(f"device: {device}")
 
-    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     assert HLG.requires_grad is False
 
diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py b/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py
index 9754b4939..6cfe2de89 100644
--- a/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py
+++ b/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py
@@ -139,13 +139,13 @@ def main():
         subsampling_factor=params.subsampling_factor,
     )
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"])
     model.to(device)
     model.eval()
 
     logging.info(f"Loading HLG from {params.HLG}")
-    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     if not hasattr(HLG, "lm_scores"):
         # For whole-lattice-rescoring and attention-decoder
diff --git a/egs/aishell/ASR/transducer_stateless/pretrained.py b/egs/aishell/ASR/transducer_stateless/pretrained.py
index 540e7b61b..b52139d88 100755
--- a/egs/aishell/ASR/transducer_stateless/pretrained.py
+++ b/egs/aishell/ASR/transducer_stateless/pretrained.py
@@ -245,7 +245,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py b/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py
index 4a4e9237c..56353712a 100755
--- a/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py
+++ b/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py
@@ -225,7 +225,7 @@ def main():
     logging.info("About to create model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/transducer_stateless_modified/pretrained.py b/egs/aishell/ASR/transducer_stateless_modified/pretrained.py
index 66a91709e..28e8fbf28 100755
--- a/egs/aishell/ASR/transducer_stateless_modified/pretrained.py
+++ b/egs/aishell/ASR/transducer_stateless_modified/pretrained.py
@@ -225,7 +225,7 @@ def main():
     logging.info("About to create model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"])
     model.to(device)
     model.eval()
diff --git a/egs/aishell/ASR/whisper/decode.py b/egs/aishell/ASR/whisper/decode.py
index 5350cb2b0..75d3c5a65 100755
--- a/egs/aishell/ASR/whisper/decode.py
+++ b/egs/aishell/ASR/whisper/decode.py
@@ -89,10 +89,10 @@ def average_checkpoints(
     """
     n = len(filenames)
 
-    if "model" in torch.load(filenames[0], map_location=device):
-        avg = torch.load(filenames[0], map_location=device)["model"]
+    if "model" in torch.load(filenames[0], map_location=device, weights_only=False):
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)["model"]
     else:
-        avg = torch.load(filenames[0], map_location=device)
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)
 
     # Identify shared parameters. Two parameters are said to be shared
     # if they have the same data_ptr
@@ -107,10 +107,10 @@ def average_checkpoints(
     uniqued_names = list(uniqued.values())
 
     for i in range(1, n):
-        if "model" in torch.load(filenames[i], map_location=device):
-            state_dict = torch.load(filenames[i], map_location=device)["model"]
+        if "model" in torch.load(filenames[i], map_location=device, weights_only=False):
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)["model"]
         else:
-            state_dict = torch.load(filenames[i], map_location=device)
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)
         for k in uniqued_names:
             avg[k] += state_dict[k]
 
@@ -440,7 +440,7 @@ def main():
             start = params.epoch - params.avg
             assert start >= 1, start
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 # deepspeed converted checkpoint only contains model state_dict
@@ -469,7 +469,7 @@ def main():
             torch.save(model.state_dict(), filename)
         else:
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 model.load_state_dict(checkpoint, strict=True)
diff --git a/egs/aishell/ASR/zipformer/decode.py b/egs/aishell/ASR/zipformer/decode.py
index 538189e52..85b75c988 100755
--- a/egs/aishell/ASR/zipformer/decode.py
+++ b/egs/aishell/ASR/zipformer/decode.py
@@ -761,7 +761,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/aishell/ASR/zipformer/decode_bbpe.py b/egs/aishell/ASR/zipformer/decode_bbpe.py
index 1ec10b059..79376c638 100755
--- a/egs/aishell/ASR/zipformer/decode_bbpe.py
+++ b/egs/aishell/ASR/zipformer/decode_bbpe.py
@@ -783,7 +783,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/aishell/ASR/zipformer/pretrained_bbpe.py b/egs/aishell/ASR/zipformer/pretrained_bbpe.py
index 387bef98a..f2cddb9b1 100755
--- a/egs/aishell/ASR/zipformer/pretrained_bbpe.py
+++ b/egs/aishell/ASR/zipformer/pretrained_bbpe.py
@@ -298,7 +298,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell2/ASR/pruned_transducer_stateless5/decode.py b/egs/aishell2/ASR/pruned_transducer_stateless5/decode.py
index 9e44b4e34..93f75b36f 100755
--- a/egs/aishell2/ASR/pruned_transducer_stateless5/decode.py
+++ b/egs/aishell2/ASR/pruned_transducer_stateless5/decode.py
@@ -728,7 +728,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py b/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py
index f04632388..1002a6645 100755
--- a/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py
@@ -226,7 +226,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/aishell4/ASR/local/prepare_lang.py b/egs/aishell4/ASR/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100755
--- a/egs/aishell4/ASR/local/prepare_lang.py
+++ b/egs/aishell4/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py b/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py
index e8b7f71b7..f85d0552f 100755
--- a/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py
@@ -238,7 +238,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/alimeeting/ASR/local/prepare_lang.py b/egs/alimeeting/ASR/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100755
--- a/egs/alimeeting/ASR/local/prepare_lang.py
+++ b/egs/alimeeting/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py b/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py
index a738bb3fb..7566f9a5f 100644
--- a/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py
+++ b/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py
@@ -224,7 +224,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/ami/ASR/pruned_transducer_stateless7/decode.py b/egs/ami/ASR/pruned_transducer_stateless7/decode.py
index 9999894d1..712855733 100755
--- a/egs/ami/ASR/pruned_transducer_stateless7/decode.py
+++ b/egs/ami/ASR/pruned_transducer_stateless7/decode.py
@@ -672,7 +672,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/ami/SURT/dprnn_zipformer/train.py b/egs/ami/SURT/dprnn_zipformer/train.py
index 3572acd04..d5025b477 100755
--- a/egs/ami/SURT/dprnn_zipformer/train.py
+++ b/egs/ami/SURT/dprnn_zipformer/train.py
@@ -1263,7 +1263,7 @@ def run(rank, world_size, args):
         logging.info(
             f"Initializing model with checkpoint from {params.model_init_ckpt}"
         )
-        init_ckpt = torch.load(params.model_init_ckpt, map_location=device)
+        init_ckpt = torch.load(params.model_init_ckpt, map_location=device, weights_only=False)
         model.load_state_dict(init_ckpt["model"], strict=False)
 
     if world_size > 1:
diff --git a/egs/ami/SURT/dprnn_zipformer/train_adapt.py b/egs/ami/SURT/dprnn_zipformer/train_adapt.py
index 313a5c46a..35b3ced31 100755
--- a/egs/ami/SURT/dprnn_zipformer/train_adapt.py
+++ b/egs/ami/SURT/dprnn_zipformer/train_adapt.py
@@ -1254,7 +1254,7 @@ def run(rank, world_size, args):
         logging.info(
             f"Initializing model with checkpoint from {params.model_init_ckpt}"
         )
-        init_ckpt = torch.load(params.model_init_ckpt, map_location=device)
+        init_ckpt = torch.load(params.model_init_ckpt, map_location=device, weights_only=False)
         model.load_state_dict(init_ckpt["model"], strict=False)
 
     if world_size > 1:
diff --git a/egs/audioset/AT/zipformer/pretrained.py b/egs/audioset/AT/zipformer/pretrained.py
index bdbd799fa..8876b5889 100755
--- a/egs/audioset/AT/zipformer/pretrained.py
+++ b/egs/audioset/AT/zipformer/pretrained.py
@@ -141,7 +141,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/baker_zh/TTS/matcha/infer.py b/egs/baker_zh/TTS/matcha/infer.py
index b90c2fdbd..142d9fdfe 100755
--- a/egs/baker_zh/TTS/matcha/infer.py
+++ b/egs/baker_zh/TTS/matcha/infer.py
@@ -115,7 +115,7 @@ def load_vocoder(checkpoint_path: Path) -> nn.Module:
 
     hifigan = HiFiGAN(h).to("cpu")
     hifigan.load_state_dict(
-        torch.load(checkpoint_path, map_location="cpu")["generator"]
+        torch.load(checkpoint_path, map_location="cpu", weights_only=False)["generator"]
     )
     _ = hifigan.eval()
     hifigan.remove_weight_norm()
diff --git a/egs/commonvoice/ASR/local/compile_hlg.py b/egs/commonvoice/ASR/local/compile_hlg.py
index 6512aa68b..76b7afcab 100755
--- a/egs/commonvoice/ASR/local/compile_hlg.py
+++ b/egs/commonvoice/ASR/local/compile_hlg.py
@@ -73,11 +73,11 @@ def compile_HLG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
     max_token_id = max(lexicon.tokens)
     logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
     H = k2.ctc_topo(max_token_id)
-    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
+    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
 
     if Path(f"{lang_dir}/lm/{lm}.pt").is_file():
         logging.info(f"Loading pre-compiled {lm}")
-        d = torch.load(f"{lang_dir}/lm/{lm}.pt")
+        d = torch.load(f"{lang_dir}/lm/{lm}.pt", weights_only=False)
         G = k2.Fsa.from_dict(d)
     else:
         logging.info(f"Loading {lm}.fst.txt")
diff --git a/egs/commonvoice/ASR/local/compile_lg.py b/egs/commonvoice/ASR/local/compile_lg.py
index 76dacb5b2..2a17e91c6 100755
--- a/egs/commonvoice/ASR/local/compile_lg.py
+++ b/egs/commonvoice/ASR/local/compile_lg.py
@@ -68,11 +68,11 @@ def compile_LG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
       An FSA representing LG.
     """
     lexicon = Lexicon(lang_dir)
-    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
+    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
 
     if Path(f"{lang_dir}/lm/{lm}.pt").is_file():
         logging.info(f"Loading pre-compiled {lm}")
-        d = torch.load(f"{lang_dir}/lm/{lm}.pt")
+        d = torch.load(f"{lang_dir}/lm/{lm}.pt", weights_only=False)
         G = k2.Fsa.from_dict(d)
     else:
         logging.info(f"Loading {lm}.fst.txt")
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/decode.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/decode.py
index 52b2fbcab..00f6616a4 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7/decode.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/decode.py
@@ -910,7 +910,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py
index b6e2451e8..eee563e70 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py
@@ -247,7 +247,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/decode.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/decode.py
index 7ae4f1894..6dfb32728 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/decode.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/decode.py
@@ -767,7 +767,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/finetune.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/finetune.py
index fb812b391..1b187da1a 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/finetune.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/finetune.py
@@ -627,7 +627,7 @@ def load_model_params(
 
     """
     logging.info(f"Loading checkpoint from {ckpt}")
-    checkpoint = torch.load(ckpt, map_location="cpu")
+    checkpoint = torch.load(ckpt, map_location="cpu", weights_only=False)
 
     # if module list is empty, load the whole model from ckpt
     if not init_modules:
diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/generate_model_from_checkpoint.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/generate_model_from_checkpoint.py
index 3fd14aa47..1a104442f 100755
--- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/generate_model_from_checkpoint.py
+++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/generate_model_from_checkpoint.py
@@ -25,7 +25,7 @@ Usage:
     --exp-dir ./pruned_transducer_stateless7/exp
 
 It will generate a file `epoch-28-avg-15-use-averaged-model.pt` in the given `exp_dir`.
-You can later load it by `torch.load("epoch-28-avg-15-use-averaged-model.pt")`.
+You can later load it by `torch.load("epoch-28-avg-15-use-averaged-model.pt", weights_only=False)`.
 
 (2) use the averaged model with checkpoint exp_dir/checkpoint-iter.pt
 ./pruned_transducer_stateless7/generate_model_from_checkpoint.py \
@@ -35,7 +35,7 @@ You can later load it by `torch.load("epoch-28-avg-15-use-averaged-model.pt")`.
     --exp-dir ./pruned_transducer_stateless7/exp
 
 It will generate a file `iter-22000-avg-5-use-averaged-model.pt` in the given `exp_dir`.
-You can later load it by `torch.load("iter-22000-avg-5-use-averaged-model.pt")`.
+You can later load it by `torch.load("iter-22000-avg-5-use-averaged-model.pt", weights_only=False)`.
 
 (3) use the original model with checkpoint exp_dir/epoch-xxx.pt
 ./pruned_transducer_stateless7/generate_model_from_checkpoint.py \
@@ -45,7 +45,7 @@ You can later load it by `torch.load("iter-22000-avg-5-use-averaged-model.pt")`.
     --exp-dir ./pruned_transducer_stateless7/exp
 
 It will generate a file `epoch-28-avg-15.pt` in the given `exp_dir`.
-You can later load it by `torch.load("epoch-28-avg-15.pt")`.
+You can later load it by `torch.load("epoch-28-avg-15.pt", weights_only=False)`.
 
 (4) use the original model with checkpoint exp_dir/checkpoint-iter.pt
 ./pruned_transducer_stateless7/generate_model_from_checkpoint.py \
@@ -55,7 +55,7 @@ You can later load it by `torch.load("epoch-28-avg-15.pt")`.
     --exp-dir ./pruned_transducer_stateless7/exp
 
 It will generate a file `iter-22000-avg-5.pt` in the given `exp_dir`.
-You can later load it by `torch.load("iter-22000-avg-5.pt")`.
+You can later load it by `torch.load("iter-22000-avg-5.pt", weights_only=False)`.
 """
 
 
diff --git a/egs/commonvoice/ASR/zipformer/decode.py b/egs/commonvoice/ASR/zipformer/decode.py
index 7fd6d0ccd..5e3cbaf92 100755
--- a/egs/commonvoice/ASR/zipformer/decode.py
+++ b/egs/commonvoice/ASR/zipformer/decode.py
@@ -987,7 +987,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/commonvoice/ASR/zipformer/decode_char.py b/egs/commonvoice/ASR/zipformer/decode_char.py
index 1f8c9c7c6..8a814122d 100755
--- a/egs/commonvoice/ASR/zipformer/decode_char.py
+++ b/egs/commonvoice/ASR/zipformer/decode_char.py
@@ -756,7 +756,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/decode.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/decode.py
index f5a1d750d..8c8e7ab83 100755
--- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/decode.py
+++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/decode.py
@@ -791,7 +791,7 @@ def main():
 
     if params.decoding_graph:
         decoding_graph = k2.Fsa.from_dict(
-            torch.load(params.decoding_graph, map_location=device)
+            torch.load(params.decoding_graph, map_location=device, weights_only=False)
         )
     elif "fast_beam_search" in params.decoding_method:
         if params.decoding_method == "fast_beam_search_nbest_LG":
@@ -800,7 +800,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py
index 66fbae378..3a7a05820 100644
--- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py
+++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py
@@ -239,7 +239,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py
index 6a249dd3f..fa4f41483 100755
--- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py
+++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py
@@ -561,7 +561,7 @@ def main():
     decoding_graph = None
     if params.decoding_graph:
         decoding_graph = k2.Fsa.from_dict(
-            torch.load(params.decoding_graph, map_location=device)
+            torch.load(params.decoding_graph, map_location=device, weights_only=False)
         )
     elif params.decoding_method == "fast_beam_search":
         decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device)
diff --git a/egs/fluent_speech_commands/SLU/local/compile_hlg.py b/egs/fluent_speech_commands/SLU/local/compile_hlg.py
index a7df8f966..803164d82 100755
--- a/egs/fluent_speech_commands/SLU/local/compile_hlg.py
+++ b/egs/fluent_speech_commands/SLU/local/compile_hlg.py
@@ -47,7 +47,7 @@ def compile_HLG(lang_dir: str) -> k2.Fsa:
     max_token_id = max(lexicon.tokens)
     logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
     H = k2.ctc_topo(max_token_id)
-    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
+    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
 
     logging.info("Loading G.fst.txt")
     with open(lang_dir / "G.fst.txt") as f:
diff --git a/egs/fluent_speech_commands/SLU/local/prepare_lang.py b/egs/fluent_speech_commands/SLU/local/prepare_lang.py
index 2a71dcf81..72b9bf1c3 100755
--- a/egs/fluent_speech_commands/SLU/local/prepare_lang.py
+++ b/egs/fluent_speech_commands/SLU/local/prepare_lang.py
@@ -14,7 +14,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/gigaspeech/ASR/conformer_ctc/decode.py b/egs/gigaspeech/ASR/conformer_ctc/decode.py
index d7035a1f8..47f35174f 100755
--- a/egs/gigaspeech/ASR/conformer_ctc/decode.py
+++ b/egs/gigaspeech/ASR/conformer_ctc/decode.py
@@ -589,7 +589,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -628,7 +628,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
diff --git a/egs/gigaspeech/ASR/zipformer/ctc_decode.py b/egs/gigaspeech/ASR/zipformer/ctc_decode.py
index 651f20cb6..c28abf020 100755
--- a/egs/gigaspeech/ASR/zipformer/ctc_decode.py
+++ b/egs/gigaspeech/ASR/zipformer/ctc_decode.py
@@ -668,7 +668,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -707,7 +707,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.decoding_method == "whole-lattice-rescoring":
diff --git a/egs/gigaspeech/ASR/zipformer/decode.py b/egs/gigaspeech/ASR/zipformer/decode.py
index 3a0c71484..cbd54ad9e 100755
--- a/egs/gigaspeech/ASR/zipformer/decode.py
+++ b/egs/gigaspeech/ASR/zipformer/decode.py
@@ -1000,7 +1000,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/gigaspeech/KWS/zipformer/decode-asr.py b/egs/gigaspeech/KWS/zipformer/decode-asr.py
index 149b8bed0..9d1c36466 100755
--- a/egs/gigaspeech/KWS/zipformer/decode-asr.py
+++ b/egs/gigaspeech/KWS/zipformer/decode-asr.py
@@ -1001,7 +1001,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/gigaspeech/KWS/zipformer/finetune.py b/egs/gigaspeech/KWS/zipformer/finetune.py
index a7ba56127..91ed7c093 100755
--- a/egs/gigaspeech/KWS/zipformer/finetune.py
+++ b/egs/gigaspeech/KWS/zipformer/finetune.py
@@ -183,7 +183,7 @@ def load_model_params(
 
     """
     logging.info(f"Loading checkpoint from {ckpt}")
-    checkpoint = torch.load(ckpt, map_location="cpu")
+    checkpoint = torch.load(ckpt, map_location="cpu", weights_only=False)
 
     # if module list is empty, load the whole model from ckpt
     if not init_modules:
diff --git a/egs/ksponspeech/ASR/pruned_transducer_stateless7_streaming/decode.py b/egs/ksponspeech/ASR/pruned_transducer_stateless7_streaming/decode.py
index 0f3f1c1ab..c82b910bb 100755
--- a/egs/ksponspeech/ASR/pruned_transducer_stateless7_streaming/decode.py
+++ b/egs/ksponspeech/ASR/pruned_transducer_stateless7_streaming/decode.py
@@ -938,7 +938,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/ksponspeech/ASR/zipformer/ctc_decode.py b/egs/ksponspeech/ASR/zipformer/ctc_decode.py
index 30bf1610b..10239db5e 100755
--- a/egs/ksponspeech/ASR/zipformer/ctc_decode.py
+++ b/egs/ksponspeech/ASR/zipformer/ctc_decode.py
@@ -666,7 +666,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -705,7 +705,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.decoding_method == "whole-lattice-rescoring":
diff --git a/egs/ksponspeech/ASR/zipformer/decode.py b/egs/ksponspeech/ASR/zipformer/decode.py
index 5c21abb79..ba0383010 100755
--- a/egs/ksponspeech/ASR/zipformer/decode.py
+++ b/egs/ksponspeech/ASR/zipformer/decode.py
@@ -989,7 +989,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/libricss/SURT/dprnn_zipformer/pretrained.py b/egs/libricss/SURT/dprnn_zipformer/pretrained.py
index 5f9468957..73468417a 100755
--- a/egs/libricss/SURT/dprnn_zipformer/pretrained.py
+++ b/egs/libricss/SURT/dprnn_zipformer/pretrained.py
@@ -177,7 +177,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/libricss/SURT/dprnn_zipformer/train.py b/egs/libricss/SURT/dprnn_zipformer/train.py
index 148cafd4b..186d4f6fb 100755
--- a/egs/libricss/SURT/dprnn_zipformer/train.py
+++ b/egs/libricss/SURT/dprnn_zipformer/train.py
@@ -1286,7 +1286,7 @@ def run(rank, world_size, args):
         logging.info(
             f"Initializing model with checkpoint from {params.model_init_ckpt}"
         )
-        init_ckpt = torch.load(params.model_init_ckpt, map_location=device)
+        init_ckpt = torch.load(params.model_init_ckpt, map_location=device, weights_only=False)
         model.load_state_dict(init_ckpt["model"], strict=False)
 
     if world_size > 1:
diff --git a/egs/libricss/SURT/dprnn_zipformer/train_adapt.py b/egs/libricss/SURT/dprnn_zipformer/train_adapt.py
index 8c37430ec..4d1f3cf02 100755
--- a/egs/libricss/SURT/dprnn_zipformer/train_adapt.py
+++ b/egs/libricss/SURT/dprnn_zipformer/train_adapt.py
@@ -1175,7 +1175,7 @@ def run(rank, world_size, args):
         logging.info(
             f"Initializing model with checkpoint from {params.model_init_ckpt}"
         )
-        init_ckpt = torch.load(params.model_init_ckpt, map_location=device)
+        init_ckpt = torch.load(params.model_init_ckpt, map_location=device, weights_only=False)
         model.load_state_dict(init_ckpt["model"], strict=True)
 
     if world_size > 1:
diff --git a/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py b/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py
index 458109a3f..763bb8b51 100644
--- a/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py
+++ b/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py
@@ -252,7 +252,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/librilight/SSL/zipformer/decode.py b/egs/librilight/SSL/zipformer/decode.py
index 95643c5e1..88b67600b 100644
--- a/egs/librilight/SSL/zipformer/decode.py
+++ b/egs/librilight/SSL/zipformer/decode.py
@@ -960,7 +960,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/librilight/SSL/zipformer/finetune.py b/egs/librilight/SSL/zipformer/finetune.py
index 50dbd5f2d..793725614 100644
--- a/egs/librilight/SSL/zipformer/finetune.py
+++ b/egs/librilight/SSL/zipformer/finetune.py
@@ -750,7 +750,7 @@ def _to_int_tuple(s: str):
 def get_encoder_model(params: AttributeDict) -> nn.Module:
     if hasattr(params, "pretrained_dir"):
         logging.info(f"Loading {params.pretrained_dir}")
-        pretrained = torch.load(params.pretrained_dir)
+        pretrained = torch.load(params.pretrained_dir, weights_only=False)
         encoder = HubertModel(params)
         encoder.load_state_dict(pretrained["model"])
     else:
diff --git a/egs/librispeech/ASR/local/prepare_lang.py b/egs/librispeech/ASR/local/prepare_lang.py
index d913756a1..82785ad6e 100755
--- a/egs/librispeech/ASR/local/prepare_lang.py
+++ b/egs/librispeech/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py
index e06404619..e1b9779a0 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py
@@ -264,7 +264,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/librispeech/ASR/transducer_stateless/pretrained.py b/egs/librispeech/ASR/transducer_stateless/pretrained.py
index 3b86e319e..c5c58f140 100755
--- a/egs/librispeech/ASR/transducer_stateless/pretrained.py
+++ b/egs/librispeech/ASR/transducer_stateless/pretrained.py
@@ -234,7 +234,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/librispeech/ASR/transducer_stateless2/pretrained.py b/egs/librispeech/ASR/transducer_stateless2/pretrained.py
index 2de4182f1..9f9159cea 100755
--- a/egs/librispeech/ASR/transducer_stateless2/pretrained.py
+++ b/egs/librispeech/ASR/transducer_stateless2/pretrained.py
@@ -234,7 +234,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py
index 83094ea51..973205078 100755
--- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py
+++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py
@@ -234,7 +234,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/librispeech/SSL/hubert/decode.py b/egs/librispeech/SSL/hubert/decode.py
index 837061b8c..f13f8dc9a 100644
--- a/egs/librispeech/SSL/hubert/decode.py
+++ b/egs/librispeech/SSL/hubert/decode.py
@@ -962,7 +962,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/librispeech/SSL/hubert/decode_ce.py b/egs/librispeech/SSL/hubert/decode_ce.py
index a8d8bc9c2..9529ce627 100644
--- a/egs/librispeech/SSL/hubert/decode_ce.py
+++ b/egs/librispeech/SSL/hubert/decode_ce.py
@@ -962,7 +962,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/librispeech/SSL/hubert/finetune.py b/egs/librispeech/SSL/hubert/finetune.py
index 0080513f3..ea92b3947 100644
--- a/egs/librispeech/SSL/hubert/finetune.py
+++ b/egs/librispeech/SSL/hubert/finetune.py
@@ -451,7 +451,7 @@ def _to_int_tuple(s: str):
 def get_encoder_model(params: AttributeDict) -> nn.Module:
     if hasattr(params, "pretrained_dir"):
         logging.info(f"Loading {params.pretrained_dir}")
-        pretrained = torch.load(params.pretrained_dir)
+        pretrained = torch.load(params.pretrained_dir, weights_only=False)
         encoder = HubertModel(params)
         encoder.load_state_dict(pretrained["model"])
     else:
diff --git a/egs/librispeech/SSL/hubert/finetune_ce.py b/egs/librispeech/SSL/hubert/finetune_ce.py
index 1ff2b03c0..1c1dc25a5 100644
--- a/egs/librispeech/SSL/hubert/finetune_ce.py
+++ b/egs/librispeech/SSL/hubert/finetune_ce.py
@@ -451,7 +451,7 @@ def _to_int_tuple(s: str):
 def get_encoder_model(params: AttributeDict) -> nn.Module:
     if hasattr(params, "pretrained_dir"):
         logging.info(f"Loading {params.pretrained_dir}")
-        pretrained = torch.load(params.pretrained_dir)
+        pretrained = torch.load(params.pretrained_dir, weights_only=False)
         encoder = HubertModel(params)
         encoder.load_state_dict(pretrained["model"])
     else:
diff --git a/egs/librispeech/SSL/local/convert_checkpoint_from_fairseq.py b/egs/librispeech/SSL/local/convert_checkpoint_from_fairseq.py
index 4212cd9c6..d048e15e2 100644
--- a/egs/librispeech/SSL/local/convert_checkpoint_from_fairseq.py
+++ b/egs/librispeech/SSL/local/convert_checkpoint_from_fairseq.py
@@ -12,7 +12,7 @@ args = parser.parse_args()
 src = args.src
 tgt = args.tgt
 
-old_checkpoint = torch.load(src)
+old_checkpoint = torch.load(src, weights_only=False)
 new_checkpoint = OrderedDict()
 new_checkpoint["model"] = old_checkpoint["model"]
 torch.save(new_checkpoint, tgt)
diff --git a/egs/librispeech/SSL/local/prepare_lang.py b/egs/librispeech/SSL/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100644
--- a/egs/librispeech/SSL/local/prepare_lang.py
+++ b/egs/librispeech/SSL/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/librispeech/SSL/zipformer/decode.py b/egs/librispeech/SSL/zipformer/decode.py
index 1562c28b8..9f385ea68 100644
--- a/egs/librispeech/SSL/zipformer/decode.py
+++ b/egs/librispeech/SSL/zipformer/decode.py
@@ -960,7 +960,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/librispeech/SSL/zipformer/finetune.py b/egs/librispeech/SSL/zipformer/finetune.py
index 5bebf60f0..8b044fbb5 100644
--- a/egs/librispeech/SSL/zipformer/finetune.py
+++ b/egs/librispeech/SSL/zipformer/finetune.py
@@ -750,7 +750,7 @@ def _to_int_tuple(s: str):
 def get_encoder_model(params: AttributeDict) -> nn.Module:
     if hasattr(params, "pretrained_dir"):
         logging.info(f"Loading {params.pretrained_dir}")
-        pretrained = torch.load(params.pretrained_dir)
+        pretrained = torch.load(params.pretrained_dir, weights_only=False)
         encoder = HubertModel(params)
         encoder.load_state_dict(pretrained["model"])
     else:
diff --git a/egs/librispeech/WSASR/conformer_ctc2/decode.py b/egs/librispeech/WSASR/conformer_ctc2/decode.py
index 3fa045533..822df6722 100755
--- a/egs/librispeech/WSASR/conformer_ctc2/decode.py
+++ b/egs/librispeech/WSASR/conformer_ctc2/decode.py
@@ -578,7 +578,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
diff --git a/egs/librispeech/WSASR/conformer_ctc2/decode_phone.py b/egs/librispeech/WSASR/conformer_ctc2/decode_phone.py
index b6b1cb020..95b57b8e8 100755
--- a/egs/librispeech/WSASR/conformer_ctc2/decode_phone.py
+++ b/egs/librispeech/WSASR/conformer_ctc2/decode_phone.py
@@ -457,7 +457,7 @@ def main():
 
     params.num_classes = num_classes
 
-    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     assert HLG.requires_grad is False
 
diff --git a/egs/librispeech/WSASR/local/compile_hlg.py b/egs/librispeech/WSASR/local/compile_hlg.py
index 63791f4cc..645826974 100755
--- a/egs/librispeech/WSASR/local/compile_hlg.py
+++ b/egs/librispeech/WSASR/local/compile_hlg.py
@@ -78,11 +78,11 @@ def compile_HLG(lm_dir: str, lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
     max_token_id = max(lexicon.tokens)
     logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
     H = k2.ctc_topo(max_token_id)
-    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
+    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
 
     if Path(f"{lm_dir}/{lm}.pt").is_file():
         logging.info(f"Loading pre-compiled {lm}")
-        d = torch.load(f"{lm_dir}/{lm}.pt")
+        d = torch.load(f"{lm_dir}/{lm}.pt", weights_only=False)
         G = k2.Fsa.from_dict(d)
     else:
         logging.info(f"Loading {lm}.fst.txt")
diff --git a/egs/librispeech/WSASR/local/prepare_lang.py b/egs/librispeech/WSASR/local/prepare_lang.py
index d913756a1..82785ad6e 100755
--- a/egs/librispeech/WSASR/local/prepare_lang.py
+++ b/egs/librispeech/WSASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/librispeech/WSASR/local/prepare_otc_lang.py b/egs/librispeech/WSASR/local/prepare_otc_lang.py
index 01865b865..cfd8a18cd 100755
--- a/egs/librispeech/WSASR/local/prepare_otc_lang.py
+++ b/egs/librispeech/WSASR/local/prepare_otc_lang.py
@@ -29,7 +29,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/libritts/ASR/zipformer/ctc_decode.py b/egs/libritts/ASR/zipformer/ctc_decode.py
index d77aa5962..bd360b74f 100755
--- a/egs/libritts/ASR/zipformer/ctc_decode.py
+++ b/egs/libritts/ASR/zipformer/ctc_decode.py
@@ -802,7 +802,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -842,7 +842,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.decoding_method in [
diff --git a/egs/libritts/ASR/zipformer/decode.py b/egs/libritts/ASR/zipformer/decode.py
index 759d9d50a..484a3b0a7 100755
--- a/egs/libritts/ASR/zipformer/decode.py
+++ b/egs/libritts/ASR/zipformer/decode.py
@@ -1014,7 +1014,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/ljspeech/TTS/matcha/hifigan/xutils.py b/egs/ljspeech/TTS/matcha/hifigan/xutils.py
index eefadcb7a..2c0d00823 100644
--- a/egs/ljspeech/TTS/matcha/hifigan/xutils.py
+++ b/egs/ljspeech/TTS/matcha/hifigan/xutils.py
@@ -41,7 +41,7 @@ def get_padding(kernel_size, dilation=1):
 def load_checkpoint(filepath, device):
     assert os.path.isfile(filepath)
     print(f"Loading '{filepath}'")
-    checkpoint_dict = torch.load(filepath, map_location=device)
+    checkpoint_dict = torch.load(filepath, map_location=device, weights_only=False)
     print("Complete.")
     return checkpoint_dict
 
diff --git a/egs/ljspeech/TTS/matcha/infer.py b/egs/ljspeech/TTS/matcha/infer.py
index 0b221d5c5..8ccd35264 100755
--- a/egs/ljspeech/TTS/matcha/infer.py
+++ b/egs/ljspeech/TTS/matcha/infer.py
@@ -103,7 +103,7 @@ def load_vocoder(checkpoint_path: Path) -> nn.Module:
 
     hifigan = HiFiGAN(h).to("cpu")
     hifigan.load_state_dict(
-        torch.load(checkpoint_path, map_location="cpu")["generator"]
+        torch.load(checkpoint_path, map_location="cpu", weights_only=False)["generator"]
     )
     _ = hifigan.eval()
     hifigan.remove_weight_norm()
diff --git a/egs/mdcc/ASR/zipformer/decode.py b/egs/mdcc/ASR/zipformer/decode.py
index ce104baf7..d2ae26409 100755
--- a/egs/mdcc/ASR/zipformer/decode.py
+++ b/egs/mdcc/ASR/zipformer/decode.py
@@ -756,7 +756,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/mgb2/ASR/conformer_ctc/decode.py b/egs/mgb2/ASR/conformer_ctc/decode.py
index f771d7f1e..26e470bd7 100755
--- a/egs/mgb2/ASR/conformer_ctc/decode.py
+++ b/egs/mgb2/ASR/conformer_ctc/decode.py
@@ -575,7 +575,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -614,7 +614,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
diff --git a/egs/mgb2/ASR/conformer_ctc/pretrained.py b/egs/mgb2/ASR/conformer_ctc/pretrained.py
index 0ab2af527..8a3655bf6 100755
--- a/egs/mgb2/ASR/conformer_ctc/pretrained.py
+++ b/egs/mgb2/ASR/conformer_ctc/pretrained.py
@@ -275,7 +275,7 @@ def main():
         use_feat_batchnorm=params.use_feat_batchnorm,
     )
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
@@ -347,7 +347,7 @@ def main():
         "attention-decoder",
     ]:
         logging.info(f"Loading HLG from {params.HLG}")
-        HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
+        HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu", weights_only=False))
         HLG = HLG.to(device)
         if not hasattr(HLG, "lm_scores"):
             # For whole-lattice-rescoring and attention-decoder
@@ -358,7 +358,7 @@ def main():
             "attention-decoder",
         ]:
             logging.info(f"Loading G from {params.G}")
-            G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
+            G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu", weights_only=False))
             # Add epsilon self-loops to G as we will compose
             # it with the whole lattice later
             G = G.to(device)
diff --git a/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py b/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py
index 81a16f0ff..639099f8a 100755
--- a/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py
@@ -236,7 +236,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/multi_ja_en/ASR/zipformer/decode.py b/egs/multi_ja_en/ASR/zipformer/decode.py
index 26ce3e018..9acccfcf7 100755
--- a/egs/multi_ja_en/ASR/zipformer/decode.py
+++ b/egs/multi_ja_en/ASR/zipformer/decode.py
@@ -733,7 +733,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/multi_zh-hans/ASR/whisper/decode.py b/egs/multi_zh-hans/ASR/whisper/decode.py
index f758f546c..5b9665c5a 100755
--- a/egs/multi_zh-hans/ASR/whisper/decode.py
+++ b/egs/multi_zh-hans/ASR/whisper/decode.py
@@ -90,10 +90,10 @@ def average_checkpoints(
     """
     n = len(filenames)
 
-    if "model" in torch.load(filenames[0], map_location=device):
-        avg = torch.load(filenames[0], map_location=device)["model"]
+    if "model" in torch.load(filenames[0], map_location=device, weights_only=False):
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)["model"]
     else:
-        avg = torch.load(filenames[0], map_location=device)
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)
 
     # Identify shared parameters. Two parameters are said to be shared
     # if they have the same data_ptr
@@ -108,10 +108,10 @@ def average_checkpoints(
     uniqued_names = list(uniqued.values())
 
     for i in range(1, n):
-        if "model" in torch.load(filenames[i], map_location=device):
-            state_dict = torch.load(filenames[i], map_location=device)["model"]
+        if "model" in torch.load(filenames[i], map_location=device, weights_only=False):
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)["model"]
         else:
-            state_dict = torch.load(filenames[i], map_location=device)
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)
         for k in uniqued_names:
             avg[k] += state_dict[k]
 
@@ -484,7 +484,7 @@ def main():
             start = params.epoch - params.avg
             assert start >= 1, start
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 # deepspeed converted checkpoint only contains model state_dict
@@ -513,7 +513,7 @@ def main():
             torch.save(model.state_dict(), filename)
         else:
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 model.load_state_dict(checkpoint, strict=True)
diff --git a/egs/multi_zh-hans/ASR/whisper/train.py b/egs/multi_zh-hans/ASR/whisper/train.py
index fe2d950c1..3ffaef212 100755
--- a/egs/multi_zh-hans/ASR/whisper/train.py
+++ b/egs/multi_zh-hans/ASR/whisper/train.py
@@ -809,7 +809,7 @@ def run(rank, world_size, args):
     del model.alignment_heads
 
     if params.pretrained_model_path:
-        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu")
+        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu", weights_only=False)
         if "model" not in checkpoint:
             model.load_state_dict(checkpoint, strict=True)
         else:
diff --git a/egs/multi_zh-hans/ASR/zipformer/decode.py b/egs/multi_zh-hans/ASR/zipformer/decode.py
index a1d018cd2..e2f7bd678 100755
--- a/egs/multi_zh-hans/ASR/zipformer/decode.py
+++ b/egs/multi_zh-hans/ASR/zipformer/decode.py
@@ -784,7 +784,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py b/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py
index 68111fad7..0164456b3 100755
--- a/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py
+++ b/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py
@@ -24,7 +24,7 @@ Usage:
     --exp-dir ./zipformer/exp
 
 It will generate a file `epoch-28-avg-15.pt` in the given `exp_dir`.
-You can later load it by `torch.load("epoch-28-avg-15.pt")`.
+You can later load it by `torch.load("epoch-28-avg-15.pt", weights_only=False)`.
 
 (2) use the checkpoint exp_dir/checkpoint-iter.pt
 ./zipformer/generate_averaged_model.py \
@@ -33,7 +33,7 @@ You can later load it by `torch.load("epoch-28-avg-15.pt")`.
     --exp-dir ./zipformer/exp
 
 It will generate a file `iter-22000-avg-5.pt` in the given `exp_dir`.
-You can later load it by `torch.load("iter-22000-avg-5.pt")`.
+You can later load it by `torch.load("iter-22000-avg-5.pt", weights_only=False)`.
 """
 
 
diff --git a/egs/multi_zh-hans/ASR/zipformer/pretrained.py b/egs/multi_zh-hans/ASR/zipformer/pretrained.py
index 1b53465c0..53be57fae 100755
--- a/egs/multi_zh-hans/ASR/zipformer/pretrained.py
+++ b/egs/multi_zh-hans/ASR/zipformer/pretrained.py
@@ -291,7 +291,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/multi_zh_en/ASR/zipformer/decode.py b/egs/multi_zh_en/ASR/zipformer/decode.py
index e21e8f052..b5b87af41 100755
--- a/egs/multi_zh_en/ASR/zipformer/decode.py
+++ b/egs/multi_zh_en/ASR/zipformer/decode.py
@@ -792,7 +792,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/multi_zh_en/ASR/zipformer/generate_averaged_model.py b/egs/multi_zh_en/ASR/zipformer/generate_averaged_model.py
index 68111fad7..0164456b3 100755
--- a/egs/multi_zh_en/ASR/zipformer/generate_averaged_model.py
+++ b/egs/multi_zh_en/ASR/zipformer/generate_averaged_model.py
@@ -24,7 +24,7 @@ Usage:
     --exp-dir ./zipformer/exp
 
 It will generate a file `epoch-28-avg-15.pt` in the given `exp_dir`.
-You can later load it by `torch.load("epoch-28-avg-15.pt")`.
+You can later load it by `torch.load("epoch-28-avg-15.pt", weights_only=False)`.
 
 (2) use the checkpoint exp_dir/checkpoint-iter.pt
 ./zipformer/generate_averaged_model.py \
@@ -33,7 +33,7 @@ You can later load it by `torch.load("epoch-28-avg-15.pt")`.
     --exp-dir ./zipformer/exp
 
 It will generate a file `iter-22000-avg-5.pt` in the given `exp_dir`.
-You can later load it by `torch.load("iter-22000-avg-5.pt")`.
+You can later load it by `torch.load("iter-22000-avg-5.pt", weights_only=False)`.
 """
 
 
diff --git a/egs/multi_zh_en/ASR/zipformer/pretrained.py b/egs/multi_zh_en/ASR/zipformer/pretrained.py
index 2fcde550b..0f8de5020 100755
--- a/egs/multi_zh_en/ASR/zipformer/pretrained.py
+++ b/egs/multi_zh_en/ASR/zipformer/pretrained.py
@@ -294,7 +294,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/ptb/LM/local/sort_lm_training_data.py b/egs/ptb/LM/local/sort_lm_training_data.py
index bed3856e4..dcc888de8 100755
--- a/egs/ptb/LM/local/sort_lm_training_data.py
+++ b/egs/ptb/LM/local/sort_lm_training_data.py
@@ -64,7 +64,7 @@ def main():
     if out_lm_data.is_file():
         logging.warning(f"{out_lm_data} exists - skipping")
         return
-    data = torch.load(in_lm_data)
+    data = torch.load(in_lm_data, weights_only=False)
     words2bpe = data["words"]
     sentences = data["sentences"]
     sentence_lengths = data["sentence_lengths"]
diff --git a/egs/ptb/LM/local/test_prepare_lm_training_data.py b/egs/ptb/LM/local/test_prepare_lm_training_data.py
index 3790045fa..aedca9d5e 100755
--- a/egs/ptb/LM/local/test_prepare_lm_training_data.py
+++ b/egs/ptb/LM/local/test_prepare_lm_training_data.py
@@ -37,7 +37,7 @@ def main():
     sp = spm.SentencePieceProcessor()
     sp.load(str(bpe_model))
 
-    data = torch.load(lm_training_data)
+    data = torch.load(lm_training_data, weights_only=False)
     words2bpe = data["words"]
     sentences = data["sentences"]
 
diff --git a/egs/reazonspeech/ASR/zipformer/decode.py b/egs/reazonspeech/ASR/zipformer/decode.py
index cdd2145f2..7b180bb02 100755
--- a/egs/reazonspeech/ASR/zipformer/decode.py
+++ b/egs/reazonspeech/ASR/zipformer/decode.py
@@ -1008,7 +1008,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py b/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py
index 3036b471e..7c3901c20 100755
--- a/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py
+++ b/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py
@@ -95,10 +95,10 @@ def average_checkpoints(
     """
     n = len(filenames)
 
-    if "model" in torch.load(filenames[0], map_location=device):
-        avg = torch.load(filenames[0], map_location=device)["model"]
+    if "model" in torch.load(filenames[0], map_location=device, weights_only=False):
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)["model"]
     else:
-        avg = torch.load(filenames[0], map_location=device)
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)
 
     # Identify shared parameters. Two parameters are said to be shared
     # if they have the same data_ptr
@@ -113,10 +113,10 @@ def average_checkpoints(
     uniqued_names = list(uniqued.values())
 
     for i in range(1, n):
-        if "model" in torch.load(filenames[i], map_location=device):
-            state_dict = torch.load(filenames[i], map_location=device)["model"]
+        if "model" in torch.load(filenames[i], map_location=device, weights_only=False):
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)["model"]
         else:
-            state_dict = torch.load(filenames[i], map_location=device)
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)
         for k in uniqued_names:
             avg[k] += state_dict[k]
 
@@ -548,7 +548,7 @@ def main():
         # torch.save(avg_checkpoint, filename)
     else:
         checkpoint = torch.load(
-            f"{params.exp_dir}/epoch-{params.epoch}/pytorch_model.bin",
+            f"{params.exp_dir}/epoch-{params.epoch}/pytorch_model.bin", weights_only=False,
             map_location="cpu",
         )
         model.load_state_dict(checkpoint, strict=False)
diff --git a/egs/speech_llm/ASR_LLM/whisper_llm_zh/train.py b/egs/speech_llm/ASR_LLM/whisper_llm_zh/train.py
index 7947a60a5..7162af958 100755
--- a/egs/speech_llm/ASR_LLM/whisper_llm_zh/train.py
+++ b/egs/speech_llm/ASR_LLM/whisper_llm_zh/train.py
@@ -652,7 +652,7 @@ def run(rank, world_size, args):
     )
 
     if params.pretrained_model_path:
-        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu")
+        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu", weights_only=False)
         missing_keys, unexpected_keys = model.load_state_dict(checkpoint, strict=False)
 
     num_param = sum([p.numel() for p in model.parameters()])
@@ -704,7 +704,7 @@ def run(rank, world_size, args):
 
     sampler_state_dict = None
     if params.sampler_state_dict_path:
-        sampler_state_dict = torch.load(params.sampler_state_dict_path)
+        sampler_state_dict = torch.load(params.sampler_state_dict_path, weights_only=False)
         sampler_state_dict["max_duration"] = params.max_duration
 
     train_dl = data_module.train_dataloaders(
diff --git a/egs/speechio/ASR/whisper/decode.py b/egs/speechio/ASR/whisper/decode.py
index c20f1f714..9ee3ecd04 100644
--- a/egs/speechio/ASR/whisper/decode.py
+++ b/egs/speechio/ASR/whisper/decode.py
@@ -91,10 +91,10 @@ def average_checkpoints(
     """
     n = len(filenames)
 
-    if "model" in torch.load(filenames[0], map_location=device):
-        avg = torch.load(filenames[0], map_location=device)["model"]
+    if "model" in torch.load(filenames[0], map_location=device, weights_only=False):
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)["model"]
     else:
-        avg = torch.load(filenames[0], map_location=device)
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)
 
     # Identify shared parameters. Two parameters are said to be shared
     # if they have the same data_ptr
@@ -109,10 +109,10 @@ def average_checkpoints(
     uniqued_names = list(uniqued.values())
 
     for i in range(1, n):
-        if "model" in torch.load(filenames[i], map_location=device):
-            state_dict = torch.load(filenames[i], map_location=device)["model"]
+        if "model" in torch.load(filenames[i], map_location=device, weights_only=False):
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)["model"]
         else:
-            state_dict = torch.load(filenames[i], map_location=device)
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)
         for k in uniqued_names:
             avg[k] += state_dict[k]
 
@@ -447,7 +447,7 @@ def main():
             start = params.epoch - params.avg
             assert start >= 1, start
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 # deepspeed converted checkpoint only contains model state_dict
@@ -476,7 +476,7 @@ def main():
             torch.save(model.state_dict(), filename)
         else:
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 model.load_state_dict(checkpoint, strict=True)
diff --git a/egs/speechio/ASR/zipformer/decode.py b/egs/speechio/ASR/zipformer/decode.py
index ffdd7b500..62a7e8943 100644
--- a/egs/speechio/ASR/zipformer/decode.py
+++ b/egs/speechio/ASR/zipformer/decode.py
@@ -784,7 +784,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/spgispeech/ASR/zipformer/decode.py b/egs/spgispeech/ASR/zipformer/decode.py
index 90d318919..7cc23d1f0 100755
--- a/egs/spgispeech/ASR/zipformer/decode.py
+++ b/egs/spgispeech/ASR/zipformer/decode.py
@@ -988,7 +988,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/spgispeech/ASR/zipformer/pretrained.py b/egs/spgispeech/ASR/zipformer/pretrained.py
index a562fb9f6..a2f8e5544 100755
--- a/egs/spgispeech/ASR/zipformer/pretrained.py
+++ b/egs/spgispeech/ASR/zipformer/pretrained.py
@@ -291,7 +291,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/swbd/ASR/conformer_ctc/decode.py b/egs/swbd/ASR/conformer_ctc/decode.py
index 52e501ae1..9e28043ab 100755
--- a/egs/swbd/ASR/conformer_ctc/decode.py
+++ b/egs/swbd/ASR/conformer_ctc/decode.py
@@ -698,7 +698,7 @@ def main():
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -738,7 +738,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
 
         if params.method in [
diff --git a/egs/swbd/ASR/local/sort_lm_training_data.py b/egs/swbd/ASR/local/sort_lm_training_data.py
index bed3856e4..dcc888de8 100755
--- a/egs/swbd/ASR/local/sort_lm_training_data.py
+++ b/egs/swbd/ASR/local/sort_lm_training_data.py
@@ -64,7 +64,7 @@ def main():
     if out_lm_data.is_file():
         logging.warning(f"{out_lm_data} exists - skipping")
         return
-    data = torch.load(in_lm_data)
+    data = torch.load(in_lm_data, weights_only=False)
     words2bpe = data["words"]
     sentences = data["sentences"]
     sentence_lengths = data["sentence_lengths"]
diff --git a/egs/tal_csasr/ASR/local/prepare_lang.py b/egs/tal_csasr/ASR/local/prepare_lang.py
index c8cf9b881..aa23c4cb3 100755
--- a/egs/tal_csasr/ASR/local/prepare_lang.py
+++ b/egs/tal_csasr/ASR/local/prepare_lang.py
@@ -28,7 +28,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py b/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py
index 8a74ee745..098ea3f4c 100755
--- a/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py
@@ -235,7 +235,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/decode.py b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/decode.py
index 885778965..f4361b528 100755
--- a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/decode.py
+++ b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/decode.py
@@ -766,7 +766,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
index 6e07b5949..21d80bfef 100755
--- a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
+++ b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py
@@ -248,7 +248,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/tedlium3/ASR/conformer_ctc2/decode.py b/egs/tedlium3/ASR/conformer_ctc2/decode.py
index 28d39de70..220c7a6c1 100755
--- a/egs/tedlium3/ASR/conformer_ctc2/decode.py
+++ b/egs/tedlium3/ASR/conformer_ctc2/decode.py
@@ -675,7 +675,7 @@ def main() -> None:
         H = None
         bpe_model = None
         HLG = k2.Fsa.from_dict(
-            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
+            torch.load(f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False)
         )
         assert HLG.requires_grad is False
 
@@ -687,7 +687,7 @@ def main() -> None:
 
         if params.lm_path.is_file() and params.lm_path.suffix == ".pt":
             logging.info(f"Loading pre-compiled {params.lm_path.name}")
-            d = torch.load(params.lm_path, map_location=device)
+            d = torch.load(params.lm_path, map_location=device, weights_only=False)
             G = k2.Fsa.from_dict(d)
         elif not params.lm_path.is_file() and params.lm_path.suffix == ".txt":
             raise FileNotFoundError(f"No such language model file: '{params.lm_path}'")
diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py b/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py
index 9e58fed00..f0a32a993 100644
--- a/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py
@@ -238,7 +238,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/tedlium3/ASR/transducer_stateless/pretrained.py b/egs/tedlium3/ASR/transducer_stateless/pretrained.py
index 5300fe764..73e18e20d 100644
--- a/egs/tedlium3/ASR/transducer_stateless/pretrained.py
+++ b/egs/tedlium3/ASR/transducer_stateless/pretrained.py
@@ -257,7 +257,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/tedlium3/ASR/zipformer/decode.py b/egs/tedlium3/ASR/zipformer/decode.py
index 2c4123c20..7f8a7ef3e 100755
--- a/egs/tedlium3/ASR/zipformer/decode.py
+++ b/egs/tedlium3/ASR/zipformer/decode.py
@@ -784,7 +784,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/timit/ASR/local/compile_hlg.py b/egs/timit/ASR/local/compile_hlg.py
index c8562f4fb..150fcbc60 100755
--- a/egs/timit/ASR/local/compile_hlg.py
+++ b/egs/timit/ASR/local/compile_hlg.py
@@ -63,11 +63,11 @@ def compile_HLG(lang_dir: str) -> k2.Fsa:
     max_token_id = max(lexicon.tokens)
     logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
     H = k2.ctc_topo(max_token_id)
-    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
+    L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
 
     if Path("data/lm/G.pt").is_file():
         logging.info("Loading pre-compiled G")
-        d = torch.load("data/lm/G.pt")
+        d = torch.load("data/lm/G.pt", weights_only=False)
         G = k2.Fsa.from_dict(d)
     else:
         logging.info("Loading G_3_gram.fst.txt")
diff --git a/egs/timit/ASR/local/prepare_lang.py b/egs/timit/ASR/local/prepare_lang.py
index e9f283274..d5087ca67 100755
--- a/egs/timit/ASR/local/prepare_lang.py
+++ b/egs/timit/ASR/local/prepare_lang.py
@@ -29,7 +29,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/egs/timit/ASR/tdnn_ligru_ctc/decode.py b/egs/timit/ASR/tdnn_ligru_ctc/decode.py
index 4beeed18c..541ff09a0 100644
--- a/egs/timit/ASR/tdnn_ligru_ctc/decode.py
+++ b/egs/timit/ASR/tdnn_ligru_ctc/decode.py
@@ -398,7 +398,7 @@ def main():
 
     logging.info(f"device: {device}")
 
-    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     assert HLG.requires_grad is False
 
@@ -424,7 +424,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu")
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu", weights_only=False)
             G = k2.Fsa.from_dict(d).to(device)
 
         if params.method == "whole-lattice-rescoring":
diff --git a/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py b/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py
index 0d77bc512..78b17558c 100644
--- a/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py
+++ b/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py
@@ -167,13 +167,13 @@ def main():
         subsampling_factor=params.subsampling_factor,
     )
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"])
     model.to(device)
     model.eval()
 
     logging.info(f"Loading HLG from {params.HLG}")
-    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     if not hasattr(HLG, "lm_scores"):
         # For whole-lattice-rescoring and attention-decoder
@@ -181,7 +181,7 @@ def main():
 
     if params.method == "whole-lattice-rescoring":
         logging.info(f"Loading G from {params.G}")
-        G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
+        G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu", weights_only=False))
         # Add epsilon self-loops to G as we will compose
         # it with the whole lattice later
         G = G.to(device)
diff --git a/egs/timit/ASR/tdnn_lstm_ctc/decode.py b/egs/timit/ASR/tdnn_lstm_ctc/decode.py
index 502a48def..f3eebcc61 100644
--- a/egs/timit/ASR/tdnn_lstm_ctc/decode.py
+++ b/egs/timit/ASR/tdnn_lstm_ctc/decode.py
@@ -397,7 +397,7 @@ def main():
 
     logging.info(f"device: {device}")
 
-    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     assert HLG.requires_grad is False
 
@@ -423,7 +423,7 @@ def main():
                 torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
         else:
             logging.info("Loading pre-compiled G_4_gram.pt")
-            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu")
+            d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu", weights_only=False)
             G = k2.Fsa.from_dict(d).to(device)
 
         if params.method == "whole-lattice-rescoring":
diff --git a/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py b/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py
index f06c8c211..a1e93b329 100644
--- a/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py
+++ b/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py
@@ -167,13 +167,13 @@ def main():
         subsampling_factor=params.subsampling_factor,
     )
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"])
     model.to(device)
     model.eval()
 
     logging.info(f"Loading HLG from {params.HLG}")
-    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
+    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu", weights_only=False))
     HLG = HLG.to(device)
     if not hasattr(HLG, "lm_scores"):
         # For whole-lattice-rescoring and attention-decoder
@@ -181,7 +181,7 @@ def main():
 
     if params.method == "whole-lattice-rescoring":
         logging.info(f"Loading G from {params.G}")
-        G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
+        G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu", weights_only=False))
         # Add epsilon self-loops to G as we will compose
         # it with the whole lattice later
         G = G.to(device)
diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py
index 2bafe25d6..65afad8f0 100755
--- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py
+++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py
@@ -640,7 +640,7 @@ def main():
             lg_filename = params.lang_dir + "/LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/finetune.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/finetune.py
index c34f1593d..d03b5485c 100755
--- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/finetune.py
+++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/finetune.py
@@ -477,7 +477,7 @@ def load_model_params(
 
     """
     logging.info(f"Loading checkpoint from {ckpt}")
-    checkpoint = torch.load(ckpt, map_location="cpu")
+    checkpoint = torch.load(ckpt, map_location="cpu", weights_only=False)
 
     # if module list is empty, load the whole model from ckpt
     if not init_modules:
diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py
index 642de72d7..51c4c13c0 100755
--- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py
+++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py
@@ -220,7 +220,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py
index 17428e19d..f35042c07 100644
--- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py
@@ -220,7 +220,7 @@ def main():
     logging.info("Creating model")
     model = get_transducer_model(params)
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/wenetspeech/ASR/whisper/decode.py b/egs/wenetspeech/ASR/whisper/decode.py
index 34b1c80ef..2363a6992 100755
--- a/egs/wenetspeech/ASR/whisper/decode.py
+++ b/egs/wenetspeech/ASR/whisper/decode.py
@@ -88,10 +88,10 @@ def average_checkpoints(
     """
     n = len(filenames)
 
-    if "model" in torch.load(filenames[0], map_location=device):
-        avg = torch.load(filenames[0], map_location=device)["model"]
+    if "model" in torch.load(filenames[0], map_location=device, weights_only=False):
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)["model"]
     else:
-        avg = torch.load(filenames[0], map_location=device)
+        avg = torch.load(filenames[0], map_location=device, weights_only=False)
 
     # Identify shared parameters. Two parameters are said to be shared
     # if they have the same data_ptr
@@ -106,10 +106,10 @@ def average_checkpoints(
     uniqued_names = list(uniqued.values())
 
     for i in range(1, n):
-        if "model" in torch.load(filenames[i], map_location=device):
-            state_dict = torch.load(filenames[i], map_location=device)["model"]
+        if "model" in torch.load(filenames[i], map_location=device, weights_only=False):
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)["model"]
         else:
-            state_dict = torch.load(filenames[i], map_location=device)
+            state_dict = torch.load(filenames[i], map_location=device, weights_only=False)
         for k in uniqued_names:
             avg[k] += state_dict[k]
 
@@ -435,7 +435,7 @@ def main():
             start = params.epoch - params.avg
             assert start >= 1, start
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 # deepspeed converted checkpoint only contains model state_dict
@@ -464,7 +464,7 @@ def main():
             torch.save(model.state_dict(), filename)
         else:
             checkpoint = torch.load(
-                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu"
+                f"{params.exp_dir}/epoch-{params.epoch}.pt", map_location="cpu", weights_only=False
             )
             if "model" not in checkpoint:
                 model.load_state_dict(checkpoint, strict=True)
diff --git a/egs/wenetspeech/ASR/zipformer/decode.py b/egs/wenetspeech/ASR/zipformer/decode.py
index 0fbc8244b..63d29b7fd 100755
--- a/egs/wenetspeech/ASR/zipformer/decode.py
+++ b/egs/wenetspeech/ASR/zipformer/decode.py
@@ -757,7 +757,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/wenetspeech/KWS/zipformer/decode-asr.py b/egs/wenetspeech/KWS/zipformer/decode-asr.py
index 6425030eb..34014facc 100755
--- a/egs/wenetspeech/KWS/zipformer/decode-asr.py
+++ b/egs/wenetspeech/KWS/zipformer/decode-asr.py
@@ -706,7 +706,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/wenetspeech/KWS/zipformer/finetune.py b/egs/wenetspeech/KWS/zipformer/finetune.py
index b1abfd79e..72e786864 100755
--- a/egs/wenetspeech/KWS/zipformer/finetune.py
+++ b/egs/wenetspeech/KWS/zipformer/finetune.py
@@ -216,7 +216,7 @@ def load_model_params(
 
     """
     logging.info(f"Loading checkpoint from {ckpt}")
-    checkpoint = torch.load(ckpt, map_location="cpu")
+    checkpoint = torch.load(ckpt, map_location="cpu", weights_only=False)
 
     # if module list is empty, load the whole model from ckpt
     if not init_modules:
diff --git a/egs/wenetspeech4tts/TTS/f5-tts/generate_averaged_model.py b/egs/wenetspeech4tts/TTS/f5-tts/generate_averaged_model.py
index f02358553..e3d3ff308 100644
--- a/egs/wenetspeech4tts/TTS/f5-tts/generate_averaged_model.py
+++ b/egs/wenetspeech4tts/TTS/f5-tts/generate_averaged_model.py
@@ -25,7 +25,7 @@ python3 bin/generate_averaged_model.py \
     --exp-dir ${exp_dir}
 
 It will generate a file `epoch-28-avg-15.pt` in the given `exp_dir`.
-You can later load it by `torch.load("epoch-28-avg-15.pt")`.
+You can later load it by `torch.load("epoch-28-avg-15.pt", weights_only=False)`.
 """
 
 
@@ -109,7 +109,7 @@ def main():
 
     print("About to create model")
     filename = f"{params.exp_dir}/epoch-{params.epoch}.pt"
-    checkpoint = torch.load(filename, map_location=device)
+    checkpoint = torch.load(filename, map_location=device, weights_only=False)
     args = AttributeDict(checkpoint)
     model = get_model(args)
 
diff --git a/egs/wenetspeech4tts/TTS/f5-tts/infer.py b/egs/wenetspeech4tts/TTS/f5-tts/infer.py
index 6964a43be..52f57b187 100644
--- a/egs/wenetspeech4tts/TTS/f5-tts/infer.py
+++ b/egs/wenetspeech4tts/TTS/f5-tts/infer.py
@@ -750,7 +750,7 @@ def main():
     vocoder = vocoder.eval().to(device)
 
     model = get_model(args).eval().to(device)
-    checkpoint = torch.load(args.model_path, map_location="cpu")
+    checkpoint = torch.load(args.model_path, map_location="cpu", weights_only=False)
     if "ema_model_state_dict" in checkpoint or "model_state_dict" in checkpoint:
         model = load_F5_TTS_pretrained_checkpoint(model, args.model_path)
     else:
diff --git a/egs/wenetspeech4tts/TTS/f5-tts/train.py b/egs/wenetspeech4tts/TTS/f5-tts/train.py
index 5333b3f27..0cc0bf240 100755
--- a/egs/wenetspeech4tts/TTS/f5-tts/train.py
+++ b/egs/wenetspeech4tts/TTS/f5-tts/train.py
@@ -987,7 +987,7 @@ def run(rank, world_size, args):
     model = get_model(params)
 
     if params.pretrained_model_path:
-        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu")
+        checkpoint = torch.load(params.pretrained_model_path, map_location="cpu", weights_only=False)
         if "ema_model_state_dict" in checkpoint or "model_state_dict" in checkpoint:
             model = load_F5_TTS_pretrained_checkpoint(
                 model, params.pretrained_model_path
diff --git a/egs/wenetspeech4tts/TTS/valle/infer.py b/egs/wenetspeech4tts/TTS/valle/infer.py
index d98abb731..1f8f285f8 100644
--- a/egs/wenetspeech4tts/TTS/valle/infer.py
+++ b/egs/wenetspeech4tts/TTS/valle/infer.py
@@ -132,7 +132,7 @@ def load_model(checkpoint, device):
     if not checkpoint:
         return None
 
-    checkpoint = torch.load(checkpoint, map_location=device)
+    checkpoint = torch.load(checkpoint, map_location=device, weights_only=False)
 
     params = AttributeDict(checkpoint)
     model = VALLE(
diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/decode.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/decode.py
index b77f734e3..7e8b50fbe 100755
--- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/decode.py
+++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/decode.py
@@ -915,7 +915,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py
index 2c106c4cb..577ee90f4 100755
--- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py
+++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py
@@ -236,7 +236,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/decode.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/decode.py
index e334e690a..375d339ca 100755
--- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/decode.py
+++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/decode.py
@@ -786,7 +786,7 @@ def main():
             lg_filename = params.lang_dir / "LG.pt"
             logging.info(f"Loading {lg_filename}")
             decoding_graph = k2.Fsa.from_dict(
-                torch.load(lg_filename, map_location=device)
+                torch.load(lg_filename, map_location=device, weights_only=False)
             )
             decoding_graph.scores *= params.ngram_lm_scale
         else:
diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py
index 6995ff2ff..a3ce5a6c4 100755
--- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py
+++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py
@@ -247,7 +247,7 @@ def main():
     num_param = sum([p.numel() for p in model.parameters()])
     logging.info(f"Number of model parameters: {num_param}")
 
-    checkpoint = torch.load(args.checkpoint, map_location="cpu")
+    checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
     model.load_state_dict(checkpoint["model"], strict=False)
     model.to(device)
     model.eval()
diff --git a/egs/yesno/ASR/local/prepare_lang.py b/egs/yesno/ASR/local/prepare_lang.py
index f7fde7796..29202eeaf 100755
--- a/egs/yesno/ASR/local/prepare_lang.py
+++ b/egs/yesno/ASR/local/prepare_lang.py
@@ -14,7 +14,7 @@ consisting of words and tokens (i.e., phones) and does the following:
 
 4. Generate L.pt, in k2 format. It can be loaded by
 
-        d = torch.load("L.pt")
+        d = torch.load("L.pt", weights_only=False)
         lexicon = k2.Fsa.from_dict(d)
 
 5. Generate L_disambig.pt, in k2 format.
diff --git a/icefall/ali.py b/icefall/ali.py
index c3e4b2662..63bf79d57 100644
--- a/icefall/ali.py
+++ b/icefall/ali.py
@@ -59,7 +59,7 @@ def load_alignments(filename: str) -> Tuple[int, Dict[str, List[int]]]:
         - alignments: A dict containing utterances and their corresponding
           framewise alignment, after subsampling.
     """
-    ali_dict = torch.load(filename)
+    ali_dict = torch.load(filename, weights_only=False)
     subsampling_factor = ali_dict["subsampling_factor"]
     alignments = ali_dict["alignments"]
     return subsampling_factor, alignments
diff --git a/icefall/lexicon.py b/icefall/lexicon.py
index 22e1b78bb..6a157ffea 100644
--- a/icefall/lexicon.py
+++ b/icefall/lexicon.py
@@ -166,10 +166,10 @@ class Lexicon(object):
 
         if (lang_dir / "Linv.pt").exists():
             logging.info(f"Loading pre-compiled {lang_dir}/Linv.pt")
-            L_inv = k2.Fsa.from_dict(torch.load(lang_dir / "Linv.pt"))
+            L_inv = k2.Fsa.from_dict(torch.load(lang_dir / "Linv.pt", weights_only=False))
         else:
             logging.info("Converting L.pt to Linv.pt")
-            L = k2.Fsa.from_dict(torch.load(lang_dir / "L.pt"))
+            L = k2.Fsa.from_dict(torch.load(lang_dir / "L.pt", weights_only=False))
             L_inv = k2.arc_sort(L.invert())
             torch.save(L_inv.as_dict(), lang_dir / "Linv.pt")
 
diff --git a/icefall/rnn_lm/dataset.py b/icefall/rnn_lm/dataset.py
index 53be53f64..f6faf8d71 100644
--- a/icefall/rnn_lm/dataset.py
+++ b/icefall/rnn_lm/dataset.py
@@ -180,7 +180,7 @@ def get_dataloader(
     Returns:
       Return a dataloader containing the LM data.
     """
-    lm_data = torch.load(filename)
+    lm_data = torch.load(filename, weights_only=False)
 
     words = lm_data["words"]
     sentences = lm_data["sentences"]
diff --git a/icefall/shared/convert-k2-to-openfst.py b/icefall/shared/convert-k2-to-openfst.py
index 29a2cd7f7..6aae7cea8 100755
--- a/icefall/shared/convert-k2-to-openfst.py
+++ b/icefall/shared/convert-k2-to-openfst.py
@@ -80,7 +80,7 @@ def main():
 
     assert Path(input_filename).is_file(), f"{input_filename} does not exist"
     logging.info(f"Loading {input_filename}")
-    k2_fst = k2.Fsa.from_dict(torch.load(input_filename))
+    k2_fst = k2.Fsa.from_dict(torch.load(input_filename, weights_only=False))
     if olabels:
         assert hasattr(k2_fst, olabels), f"No such attribute: {olabels}"
 
diff --git a/icefall/utils.py b/icefall/utils.py
index 022f83b3b..427755090 100644
--- a/icefall/utils.py
+++ b/icefall/utils.py
@@ -549,7 +549,7 @@ def load_alignments(filename: str) -> Tuple[int, Dict[str, List[int]]]:
         - alignments: A dict containing utterances and their corresponding
           framewise alignment, after subsampling.
     """
-    ali_dict = torch.load(filename)
+    ali_dict = torch.load(filename, weights_only=False)
     subsampling_factor = ali_dict["subsampling_factor"]
     alignments = ali_dict["alignments"]
     return subsampling_factor, alignments