From 408fb0f1e2a8a8e3ac2a5d3648a998efb4dffd61 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 4 Nov 2024 11:55:41 +0800 Subject: [PATCH] reuse monotonic_align lib --- .../TTS/matcha/monotonic_align/.gitignore | 3 -- .../TTS/matcha/monotonic_align/__init__.py | 23 --------- .../TTS/matcha/monotonic_align/core.pyx | 49 ------------------- .../TTS/matcha/monotonic_align/setup.py | 12 ----- 4 files changed, 87 deletions(-) delete mode 100644 egs/ljspeech/TTS/matcha/monotonic_align/.gitignore delete mode 100644 egs/ljspeech/TTS/matcha/monotonic_align/__init__.py delete mode 100644 egs/ljspeech/TTS/matcha/monotonic_align/core.pyx delete mode 100644 egs/ljspeech/TTS/matcha/monotonic_align/setup.py diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/.gitignore b/egs/ljspeech/TTS/matcha/monotonic_align/.gitignore deleted file mode 100644 index 28bdad6b8..000000000 --- a/egs/ljspeech/TTS/matcha/monotonic_align/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -build -core.c -*.so diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py b/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py deleted file mode 100644 index 5b26fe474..000000000 --- a/egs/ljspeech/TTS/matcha/monotonic_align/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copied from -# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/__init__.py -import numpy as np -import torch -from matcha.monotonic_align.core import maximum_path_c - - -def maximum_path(value, mask): - """Cython optimised version. - value: [b, t_x, t_y] - mask: [b, t_x, t_y] - """ - value = value * mask - device = value.device - dtype = value.dtype - value = value.data.cpu().numpy().astype(np.float32) - path = np.zeros_like(value).astype(np.int32) - mask = mask.data.cpu().numpy() - - t_x_max = mask.sum(1)[:, 0].astype(np.int32) - t_y_max = mask.sum(2)[:, 0].astype(np.int32) - maximum_path_c(path, value, t_x_max, t_y_max) - return torch.from_numpy(path).to(device=device, dtype=dtype) diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx b/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx deleted file mode 100644 index eabc7f273..000000000 --- a/egs/ljspeech/TTS/matcha/monotonic_align/core.pyx +++ /dev/null @@ -1,49 +0,0 @@ -# Copied from -# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/core.pyx -import numpy as np - -cimport cython -cimport numpy as np - -from cython.parallel import prange - - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil: - cdef int x - cdef int y - cdef float v_prev - cdef float v_cur - cdef float tmp - cdef int index = t_x - 1 - - for y in range(t_y): - for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): - if x == y: - v_cur = max_neg_val - else: - v_cur = value[x, y-1] - if x == 0: - if y == 0: - v_prev = 0. - else: - v_prev = max_neg_val - else: - v_prev = value[x-1, y-1] - value[x, y] = max(v_cur, v_prev) + value[x, y] - - for y in range(t_y - 1, -1, -1): - path[index, y] = 1 - if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]): - index = index - 1 - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: - cdef int b = values.shape[0] - - cdef int i - for i in prange(b, nogil=True): - maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val) diff --git a/egs/ljspeech/TTS/matcha/monotonic_align/setup.py b/egs/ljspeech/TTS/matcha/monotonic_align/setup.py deleted file mode 100644 index df26c633e..000000000 --- a/egs/ljspeech/TTS/matcha/monotonic_align/setup.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copied from -# https://github.com/shivammehta25/Matcha-TTS/blob/main/matcha/utils/monotonic_align/setup.py -from distutils.core import setup - -import numpy -from Cython.Build import cythonize - -setup( - name="monotonic_align", - ext_modules=cythonize("core.pyx"), - include_dirs=[numpy.get_include()], -)