| generator: |
| name: SoundStream |
| config: |
| n_filters: 32 |
| D: 256 |
| target_bandwidths: |
| - 0.5 |
| - 1 |
| - 1.5 |
| - 2 |
| - 4 |
| ratios: |
| - 8 |
| - 5 |
| - 4 |
| - 2 |
| sample_rate: 16000 |
| bins: 1024 |
| semantic_techer: wavlm_base_plus |
| d_list: |
| - mfd |
| mfd: |
| name: MultiFrequencyDiscriminator |
| config: |
| hop_lengths: |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| hidden_channels: |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 512 |
| - 512 |
| domain: double |
| mel_scale: true |
| sample_rate: 16000 |
| mpd: |
| name: MultiPeriodDiscriminator |
| config: |
| period_sizes: |
| - 2 |
| - 3 |
| - 5 |
| - 7 |
| - 11 |
| period_kernel_size: 5 |
| msd: |
| name: MultiScaleDiscriminator |
| config: |
| num_scales: 3 |
| pool_kernel_size: 4 |
| pool_stride: 2 |
| optimizer: |
| g: |
| name: AdamW |
| config: |
| lr: 0.0002 |
| betas: |
| - 0.8 |
| - 0.99 |
| eps: 1.0e-06 |
| d: |
| name: AdamW |
| config: |
| lr: 0.0002 |
| betas: |
| - 0.8 |
| - 0.99 |
| eps: 1.0e-06 |
| lr_scheduler: |
| g: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
| d: |
| name: ExponentialLR |
| config: |
| gamma: 0.999 |
| criterion: |
| g_criterion: |
| name: losses.generator_loss.GeneratorSTFTLoss |
| config: |
| use_mel_loss: false |
| adv_criterion: MSEGLoss |
| mel_loss_weight: 45 |
| use_feature_match: true |
| feat_match_loss_weight: 20 |
| use_full_stft_loss: true |
| use_sub_stft_loss: true |
| full_stft_loss_weight: 1 |
| sub_stft_loss_weight: 1 |
| mel_scale_loss: |
| sampling_rate: 16000 |
| n_fft: 1024 |
| num_mels: 80 |
| hop_size: 160 |
| win_size: 800 |
| fmin: 0 |
| full_multi_scale_stft_loss: |
| fft_sizes: |
| - 512 |
| - 1024 |
| - 2048 |
| win_sizes: |
| - 480 |
| - 960 |
| - 1200 |
| hop_sizes: |
| - 120 |
| - 240 |
| - 300 |
| sub_multi_scale_stft_loss: |
| num_bands: 6 |
| fft_sizes: |
| - 128 |
| - 256 |
| - 256 |
| win_sizes: |
| - 80 |
| - 120 |
| - 200 |
| hop_sizes: |
| - 20 |
| - 40 |
| - 50 |
| d_criterion: |
| name: losses.discriminator_loss.MSEDiscriminatorLoss |
| config: null |
| commit_loss_weight: 1.0 |
| codebook_loss_weight: 100 |
| audio_norm_scale: 0.95 |
| |