delete comments

This commit is contained in:
yaozengwei 2022-06-18 20:07:59 +08:00
parent d7f4920206
commit b2a4c33ddd
4 changed files with 2 additions and 14 deletions

View File

@ -1090,9 +1090,6 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items(): for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts] batch = train_dl.dataset[cuts]
try: try:
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16): with torch.cuda.amp.autocast(enabled=params.use_fp16):
loss, _ = compute_loss( loss, _ = compute_loss(
params=params, params=params,

View File

@ -1004,9 +1004,6 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items(): for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts] batch = train_dl.dataset[cuts]
try: try:
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16): with torch.cuda.amp.autocast(enabled=params.use_fp16):
loss, _ = compute_loss( loss, _ = compute_loss(
params=params, params=params,

View File

@ -1084,9 +1084,6 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items(): for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts] batch = train_dl.dataset[cuts]
try: try:
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16): with torch.cuda.amp.autocast(enabled=params.use_fp16):
loss, _ = compute_loss( loss, _ = compute_loss(
params=params, params=params,

View File

@ -74,9 +74,9 @@ from conformer import Conformer
from decoder import Decoder from decoder import Decoder
from joiner import Joiner from joiner import Joiner
from lhotse.cut import Cut, MonoCut from lhotse.cut import Cut, MonoCut
from lhotse.dataset.collation import collate_custom_field
from lhotse.dataset.sampling.base import CutSampler from lhotse.dataset.sampling.base import CutSampler
from lhotse.utils import fix_random_seed from lhotse.utils import fix_random_seed
from lhotse.dataset.collation import collate_custom_field
from model import Transducer from model import Transducer
from optim import Eden, Eve from optim import Eden, Eve
from torch import Tensor from torch import Tensor
@ -376,7 +376,7 @@ def get_params() -> AttributeDict:
"distillation_layer": 5, # 0-based index "distillation_layer": 5, # 0-based index
# Since output rate of hubert is 50, while that of encoder is 8, # Since output rate of hubert is 50, while that of encoder is 8,
# two successive codebook_index are concatenated together. # two successive codebook_index are concatenated together.
# Detailed in function Transducer::concat_sucessive_codebook_indexes. # Detailed in function Transducer::concat_sucessive_codebook_indexes
"num_codebooks": 16, # used to construct distillation loss "num_codebooks": 16, # used to construct distillation loss
} }
) )
@ -1060,9 +1060,6 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items(): for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts] batch = train_dl.dataset[cuts]
try: try:
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16): with torch.cuda.amp.autocast(enabled=params.use_fp16):
loss, _ = compute_loss( loss, _ = compute_loss(
params=params, params=params,