from local

2025-12-11 06:55:27 +00:00 · 2023-06-09 15:55:55 +09:00 · 2023-06-09 15:55:55 +09:00 · f11cfd4ebe
commit f11cfd4ebe
parent 7a4cc6f9b6
6 changed files with 50 additions and 18 deletions
--- a/egs/tedlium3/ASR/.prepare.sh.swp
+++ b/egs/tedlium3/ASR/.prepare.sh.swp
--- a/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.asr_datamodule.py.swp
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.asr_datamodule.py.swp
--- a/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.train_bitfit.py.swp
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.train_bitfit.py.swp
--- a/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.train_tta.py.swp
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/.train_tta.py.swp
--- a/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/asr_datamodule.py
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/asr_datamodule.py
@ -366,3 +366,8 @@ class TedLiumAsrDataModule:
    def test_cuts(self) -> CutSet:
        logging.info("About to get test cuts")
        return load_manifest_lazy(self.args.manifest_dir / "tedlium_cuts_test.jsonl.gz")
    @lru_cache()
    def user_test_cuts(self, user) -> CutSet:
        logging.info("About to get test cuts")
        return load_manifest_lazy(self.args.manifest_dir / f"tedlium_cuts_test_{user}.jsonl.gz")
--- a/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/train_tta.py
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless_d2v_v2/train_tta.py
@ -160,6 +160,20 @@ def add_pea_arguments(parser: argparse.ArgumentParser):
        default=False,
        help="Low Rank Adaptation training for PEA"
    )
    parser.add_argument(
        "--rank",
        type=int,
        default=2,
        help="Rank for Low Rank Adaptation. [default = 2]"
    )
    parser.add_argument(
        "--lora-alpha",
        type=float,
        default=10000.,
        help="alpha for Low Rank Adaptation. alpha will be multiplied to lora output. [default = 10000.]"
    )
    parser.add_argument(
        "--pea-lr",
@ -1114,9 +1128,9 @@ def train_one_epoch(
            params.cur_batch_idx = batch_idx
            del params.cur_batch_idx
-            if rank == 0: 
+            #if rank == 0: 
-                for i, lora in enumerate(lora_modules):
+            #    for i, lora in enumerate(lora_modules):
-                    lora.save_checkpoint(i, params.batch_idx_train, params.exp_dir)
+            #        lora.save_checkpoint(i, params.batch_idx_train, params.exp_dir)
        if batch_idx % 100 == 0 and params.use_fp16:
            # If the grad scale was less than 1, try increasing it.    The _growth_interval
@ -1495,26 +1509,39 @@ def run_pea(rank, world_size, args, wb=None):
        logging.info("Using DDP")
        model = DDP(model, device_ids=[rank], find_unused_parameters=True)
-    lora_modules = []
+    pea_names = []
-    for modules in model.modules():
+    pea_param = []
-        if isinstance(modules, fairseq.modules.multihead_attention.MultiheadAttention):
+    lora_modules = None
-            for module in modules.modules():
+
-                if isinstance(module, torch.nn.Linear):
+    if args.bitfit:
-                    lora_modules.append(LoRAHook(
+        for n, p  in model.named_parameters():
            if 'encoder.encoder' in n and 'bias' in n and 'feature_extractor' not in n:
                pea_names.append(n)
                pea_param.append(p)
            else:
                p.requires_grad = False
    elif args.lora:
        lora_modules = []
        for modules in model.modules():
            if isinstance(modules, fairseq.modules.multihead_attention.MultiheadAttention):
                for module in modules.modules():
                    if isinstance(module, torch.nn.Linear):
                        lora_modules.append(
                                        LoRAHook(
                                            module, 
                                            embedding_dim=args.encoder_dim,
                                            rank=args.rank,
                                            lora_alpha=args.lora_alpha,
-                                    ))
+                                    )
                                )
        if world_size > 1:
            logging.info("Using DDP for LoRA")
            for module in lora_modules:
                module.lora = module.lora.to(device)
                module.lora = DDP(module.lora, device_ids=[rank], find_unused_parameters=False)
    if world_size > 1:
        logging.info("Using DDP for LoRA")
        for module in lora_modules:
            module.lora = module.lora.to(device)
            module.lora = DDP(module.lora, device_ids=[rank], find_unused_parameters=False)
    pea_names = []
    pea_param = []
    for i, module in enumerate(lora_modules):
        for n, p in module.lora.named_parameters():
            new_n = str(i) + n