Skip to content

Commit 241e77c

Browse files
authored
Add files via upload
1 parent f340094 commit 241e77c

File tree

11 files changed

+943
-0
lines changed

11 files changed

+943
-0
lines changed

DEX-TTS/bigvgan/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
2+
# LICENSE is in incl_licenses directory.
3+
4+
import os
5+
import shutil
6+
from .models import BigVGAN as Generator
7+
8+
class AttrDict(dict):
9+
def __init__(self, *args, **kwargs):
10+
super(AttrDict, self).__init__(*args, **kwargs)
11+
self.__dict__ = self

DEX-TTS/bigvgan/activations.py

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Implementation adapted from https://github.com/EdwardDixon/snake under the MIT license.
2+
# LICENSE is in incl_licenses directory.
3+
4+
import torch
5+
from torch import nn, sin, pow
6+
from torch.nn import Parameter
7+
8+
9+
class Snake(nn.Module):
10+
'''
11+
Implementation of a sine-based periodic activation function
12+
Shape:
13+
- Input: (B, C, T)
14+
- Output: (B, C, T), same shape as the input
15+
Parameters:
16+
- alpha - trainable parameter
17+
References:
18+
- This activation function is from this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
19+
https://arxiv.org/abs/2006.08195
20+
Examples:
21+
>>> a1 = snake(256)
22+
>>> x = torch.randn(256)
23+
>>> x = a1(x)
24+
'''
25+
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
26+
'''
27+
Initialization.
28+
INPUT:
29+
- in_features: shape of the input
30+
- alpha: trainable parameter
31+
alpha is initialized to 1 by default, higher values = higher-frequency.
32+
alpha will be trained along with the rest of your model.
33+
'''
34+
super(Snake, self).__init__()
35+
self.in_features = in_features
36+
37+
# initialize alpha
38+
self.alpha_logscale = alpha_logscale
39+
if self.alpha_logscale: # log scale alphas initialized to zeros
40+
self.alpha = Parameter(torch.zeros(in_features) * alpha)
41+
else: # linear scale alphas initialized to ones
42+
self.alpha = Parameter(torch.ones(in_features) * alpha)
43+
44+
self.alpha.requires_grad = alpha_trainable
45+
46+
self.no_div_by_zero = 0.000000001
47+
48+
def forward(self, x):
49+
'''
50+
Forward pass of the function.
51+
Applies the function to the input elementwise.
52+
Snake ∶= x + 1/a * sin^2 (xa)
53+
'''
54+
alpha = self.alpha.unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
55+
if self.alpha_logscale:
56+
alpha = torch.exp(alpha)
57+
x = x + (1.0 / (alpha + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
58+
59+
return x
60+
61+
62+
class SnakeBeta(nn.Module):
63+
'''
64+
A modified Snake function which uses separate parameters for the magnitude of the periodic components
65+
Shape:
66+
- Input: (B, C, T)
67+
- Output: (B, C, T), same shape as the input
68+
Parameters:
69+
- alpha - trainable parameter that controls frequency
70+
- beta - trainable parameter that controls magnitude
71+
References:
72+
- This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
73+
https://arxiv.org/abs/2006.08195
74+
Examples:
75+
>>> a1 = snakebeta(256)
76+
>>> x = torch.randn(256)
77+
>>> x = a1(x)
78+
'''
79+
def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha_logscale=False):
80+
'''
81+
Initialization.
82+
INPUT:
83+
- in_features: shape of the input
84+
- alpha - trainable parameter that controls frequency
85+
- beta - trainable parameter that controls magnitude
86+
alpha is initialized to 1 by default, higher values = higher-frequency.
87+
beta is initialized to 1 by default, higher values = higher-magnitude.
88+
alpha will be trained along with the rest of your model.
89+
'''
90+
super(SnakeBeta, self).__init__()
91+
self.in_features = in_features
92+
93+
# initialize alpha
94+
self.alpha_logscale = alpha_logscale
95+
if self.alpha_logscale: # log scale alphas initialized to zeros
96+
self.alpha = Parameter(torch.zeros(in_features) * alpha)
97+
self.beta = Parameter(torch.zeros(in_features) * alpha)
98+
else: # linear scale alphas initialized to ones
99+
self.alpha = Parameter(torch.ones(in_features) * alpha)
100+
self.beta = Parameter(torch.ones(in_features) * alpha)
101+
102+
self.alpha.requires_grad = alpha_trainable
103+
self.beta.requires_grad = alpha_trainable
104+
105+
self.no_div_by_zero = 0.000000001
106+
107+
def forward(self, x):
108+
'''
109+
Forward pass of the function.
110+
Applies the function to the input elementwise.
111+
SnakeBeta ∶= x + 1/b * sin^2 (xa)
112+
'''
113+
alpha = self.alpha.unsqueeze(0).unsqueeze(-1) # line up with x to [B, C, T]
114+
beta = self.beta.unsqueeze(0).unsqueeze(-1)
115+
if self.alpha_logscale:
116+
alpha = torch.exp(alpha)
117+
beta = torch.exp(beta)
118+
x = x + (1.0 / (beta + self.no_div_by_zero)) * pow(sin(x * alpha), 2)
119+
120+
return x
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
2+
# LICENSE is in incl_licenses directory.
3+
4+
from .filter import *
5+
from .resample import *
6+
from .act import *
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
2+
# LICENSE is in incl_licenses directory.
3+
4+
import torch.nn as nn
5+
from .resample import UpSample1d, DownSample1d
6+
7+
8+
class Activation1d(nn.Module):
9+
def __init__(self,
10+
activation,
11+
up_ratio: int = 2,
12+
down_ratio: int = 2,
13+
up_kernel_size: int = 12,
14+
down_kernel_size: int = 12):
15+
super().__init__()
16+
self.up_ratio = up_ratio
17+
self.down_ratio = down_ratio
18+
self.act = activation
19+
self.upsample = UpSample1d(up_ratio, up_kernel_size)
20+
self.downsample = DownSample1d(down_ratio, down_kernel_size)
21+
22+
# x: [B,C,T]
23+
def forward(self, x):
24+
x = self.upsample(x)
25+
x = self.act(x)
26+
x = self.downsample(x)
27+
28+
return x
+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
2+
# LICENSE is in incl_licenses directory.
3+
4+
import torch
5+
import torch.nn as nn
6+
import torch.nn.functional as F
7+
import math
8+
9+
if 'sinc' in dir(torch):
10+
sinc = torch.sinc
11+
else:
12+
# This code is adopted from adefossez's julius.core.sinc under the MIT License
13+
# https://adefossez.github.io/julius/julius/core.html
14+
# LICENSE is in incl_licenses directory.
15+
def sinc(x: torch.Tensor):
16+
"""
17+
Implementation of sinc, i.e. sin(pi * x) / (pi * x)
18+
__Warning__: Different to julius.sinc, the input is multiplied by `pi`!
19+
"""
20+
return torch.where(x == 0,
21+
torch.tensor(1., device=x.device, dtype=x.dtype),
22+
torch.sin(math.pi * x) / math.pi / x)
23+
24+
25+
# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
26+
# https://adefossez.github.io/julius/julius/lowpass.html
27+
# LICENSE is in incl_licenses directory.
28+
def kaiser_sinc_filter1d(cutoff, half_width, kernel_size): # return filter [1,1,kernel_size]
29+
even = (kernel_size % 2 == 0)
30+
half_size = kernel_size // 2
31+
32+
#For kaiser window
33+
delta_f = 4 * half_width
34+
A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
35+
if A > 50.:
36+
beta = 0.1102 * (A - 8.7)
37+
elif A >= 21.:
38+
beta = 0.5842 * (A - 21)**0.4 + 0.07886 * (A - 21.)
39+
else:
40+
beta = 0.
41+
window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
42+
43+
# ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
44+
if even:
45+
time = (torch.arange(-half_size, half_size) + 0.5)
46+
else:
47+
time = torch.arange(kernel_size) - half_size
48+
if cutoff == 0:
49+
filter_ = torch.zeros_like(time)
50+
else:
51+
filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
52+
# Normalize filter to have sum = 1, otherwise we will have a small leakage
53+
# of the constant component in the input signal.
54+
filter_ /= filter_.sum()
55+
filter = filter_.view(1, 1, kernel_size)
56+
57+
return filter
58+
59+
60+
class LowPassFilter1d(nn.Module):
61+
def __init__(self,
62+
cutoff=0.5,
63+
half_width=0.6,
64+
stride: int = 1,
65+
padding: bool = True,
66+
padding_mode: str = 'replicate',
67+
kernel_size: int = 12):
68+
# kernel_size should be even number for stylegan3 setup,
69+
# in this implementation, odd number is also possible.
70+
super().__init__()
71+
if cutoff < -0.:
72+
raise ValueError("Minimum cutoff must be larger than zero.")
73+
if cutoff > 0.5:
74+
raise ValueError("A cutoff above 0.5 does not make sense.")
75+
self.kernel_size = kernel_size
76+
self.even = (kernel_size % 2 == 0)
77+
self.pad_left = kernel_size // 2 - int(self.even)
78+
self.pad_right = kernel_size // 2
79+
self.stride = stride
80+
self.padding = padding
81+
self.padding_mode = padding_mode
82+
filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
83+
self.register_buffer("filter", filter)
84+
85+
#input [B, C, T]
86+
def forward(self, x):
87+
_, C, _ = x.shape
88+
89+
if self.padding:
90+
x = F.pad(x, (self.pad_left, self.pad_right),
91+
mode=self.padding_mode)
92+
out = F.conv1d(x, self.filter.expand(C, -1, -1),
93+
stride=self.stride, groups=C)
94+
95+
return out
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
2+
# LICENSE is in incl_licenses directory.
3+
4+
import torch.nn as nn
5+
from torch.nn import functional as F
6+
from .filter import LowPassFilter1d
7+
from .filter import kaiser_sinc_filter1d
8+
9+
10+
class UpSample1d(nn.Module):
11+
def __init__(self, ratio=2, kernel_size=None):
12+
super().__init__()
13+
self.ratio = ratio
14+
self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
15+
self.stride = ratio
16+
self.pad = self.kernel_size // ratio - 1
17+
self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
18+
self.pad_right = self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
19+
filter = kaiser_sinc_filter1d(cutoff=0.5 / ratio,
20+
half_width=0.6 / ratio,
21+
kernel_size=self.kernel_size)
22+
self.register_buffer("filter", filter)
23+
24+
# x: [B, C, T]
25+
def forward(self, x):
26+
_, C, _ = x.shape
27+
28+
x = F.pad(x, (self.pad, self.pad), mode='replicate')
29+
x = self.ratio * F.conv_transpose1d(
30+
x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C)
31+
x = x[..., self.pad_left:-self.pad_right]
32+
33+
return x
34+
35+
36+
class DownSample1d(nn.Module):
37+
def __init__(self, ratio=2, kernel_size=None):
38+
super().__init__()
39+
self.ratio = ratio
40+
self.kernel_size = int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
41+
self.lowpass = LowPassFilter1d(cutoff=0.5 / ratio,
42+
half_width=0.6 / ratio,
43+
stride=ratio,
44+
kernel_size=self.kernel_size)
45+
46+
def forward(self, x):
47+
xx = self.lowpass(x)
48+
49+
return xx

0 commit comments

Comments
 (0)