Upload 3 files

Browse files

Files changed (3) hide show

radtts-pp-bgap-model/config.json +224 -0
radtts-pp-bgap-model/logs/events.out.tfevents.1683933666.desktop.2175284.0 +3 -0
radtts-pp-bgap-model/model_bgap_50000.pt +3 -0

radtts-pp-bgap-model/config.json ADDED Viewed

	@@ -0,0 +1,224 @@

+{
+    "train_config": {
+        "output_directory": "outdir_pp_bgap_model",
+        "epochs": 10000000,
+        "optim_algo": "RAdam",
+        "learning_rate": 0.0001,
+        "weight_decay": 1e-06,
+        "sigma": 1.0,
+        "iters_per_checkpoint": 1000,
+        "batch_size": 18,
+        "seed": null,
+        "checkpoint_path": "",
+        "ignore_layers": [],
+        "ignore_layers_warmstart": [],
+        "finetune_layers": [],
+        "include_layers": [],
+        "vocoder_config_path": "models/hifigan_22khz_config.json",
+        "vocoder_checkpoint_path": "models/hifigan_ljs_generator_v1.pt",
+        "log_attribute_samples": true,
+        "log_decoder_samples": true,
+        "warmstart_checkpoint_path": "models/model_radtss_pp_decoder_100000",
+        "use_amp": false,
+        "grad_clip_val": 1.0,
+        "loss_weights": {
+            "blank_logprob": -1,
+            "ctc_loss_weight": 0.1,
+            "binarization_loss_weight": 1.0,
+            "dur_loss_weight": 1.0,
+            "f0_loss_weight": 1.0,
+            "energy_loss_weight": 1.0,
+            "vpred_loss_weight": 1.0
+        },
+        "binarization_start_iter": 0,
+        "kl_loss_start_iter": 0,
+        "unfreeze_modules": "durf0energyvpred"
+    },
+    "data_config": {
+        "training_files": {
+            "LJS": {
+                "basedir": "filelists/",
+                "audiodir": "wavs",
+                "filelist": "3speakers_ukrainian_train_filelist.txt",
+                "lmdbpath": ""
+            }
+        },
+        "validation_files": {
+            "LJS": {
+                "basedir": "filelists/",
+                "audiodir": "wavs",
+                "filelist": "3speakers_ukrainian_val_filelist.txt",
+                "lmdbpath": ""
+            }
+        },
+        "dur_min": 0.1,
+        "dur_max": 10.2,
+        "sampling_rate": 22050,
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "n_mel_channels": 80,
+        "mel_fmin": 0.0,
+        "mel_fmax": 8000.0,
+        "f0_min": 80.0,
+        "f0_max": 640.0,
+        "max_wav_value": 32768.0,
+        "use_f0": true,
+        "use_log_f0": 0,
+        "use_energy_avg": true,
+        "use_scaled_energy": true,
+        "symbol_set": "ukrainian",
+        "cleaner_names": [
+            "ukrainian_cleaners"
+        ],
+        "heteronyms_path": "tts_text_processing/heteronyms",
+        "phoneme_dict_path": "tts_text_processing/cmudict-0.7b",
+        "p_phoneme": 0.0,
+        "handle_phoneme": "word",
+        "handle_phoneme_ambiguous": "ignore",
+        "include_speakers": null,
+        "n_frames": -1,
+        "betabinom_cache_path": "/home/yehor/RADTTS-Multiple-Voices/radtts/data_cache",
+        "lmdb_cache_path": "",
+        "use_attn_prior_masking": true,
+        "prepend_space_to_text": true,
+        "append_space_to_text": true,
+        "add_bos_eos_to_text": false,
+        "betabinom_scaling_factor": 1.0,
+        "distance_tx_unvoiced": true,
+        "mel_noise_scale": 0.0
+    },
+    "dist_config": {
+        "dist_backend": "nccl",
+        "dist_url": "tcp://localhost:54321"
+    },
+    "model_config": {
+        "n_speakers": 3,
+        "n_speaker_dim": 16,
+        "n_text": 185,
+        "n_text_dim": 512,
+        "n_flows": 8,
+        "n_conv_layers_per_step": 4,
+        "n_mel_channels": 80,
+        "n_hidden": 1024,
+        "mel_encoder_n_hidden": 512,
+        "dummy_speaker_embedding": false,
+        "n_early_size": 2,
+        "n_early_every": 2,
+        "n_group_size": 2,
+        "affine_model": "wavenet",
+        "include_modules": "decatndpmvpredapm",
+        "scaling_fn": "tanh",
+        "matrix_decomposition": "LUS",
+        "learn_alignments": true,
+        "use_speaker_emb_for_alignment": false,
+        "attn_straight_through_estimator": true,
+        "use_context_lstm": true,
+        "context_lstm_norm": "spectral",
+        "context_lstm_w_f0_and_energy": true,
+        "text_encoder_lstm_norm": "spectral",
+        "n_f0_dims": 1,
+        "n_energy_avg_dims": 1,
+        "use_first_order_features": true,
+        "unvoiced_bias_activation": "relu",
+        "decoder_use_partial_padding": true,
+        "decoder_use_unvoiced_bias": true,
+        "ap_pred_log_f0": true,
+        "ap_use_unvoiced_bias": false,
+        "ap_use_voiced_embeddings": true,
+        "dur_model_config": {
+            "name": "dap",
+            "hparams": {
+                "n_speaker_dim": 16,
+                "bottleneck_hparams": {
+                    "in_dim": 512,
+                    "reduction_factor": 16,
+                    "norm": "weightnorm",
+                    "non_linearity": "relu"
+                },
+                "take_log_of_input": true,
+                "arch_hparams": {
+                    "out_dim": 1,
+                    "n_layers": 2,
+                    "n_channels": 256,
+                    "kernel_size": 3,
+                    "p_dropout": 0.25,
+                    "in_dim": 48
+                }
+            }
+        },
+        "f0_model_config": {
+            "name": "bgap",
+            "hparams": {
+                "n_in_dim": 2,
+                "take_log_of_input": false,
+                "n_speaker_dim": 16,
+                "n_flows": 6,
+                "n_group_size": 2,
+                "n_layers": 4,
+                "kernel_size": 5,
+                "scaling_fn": "tanh",
+                "with_dilation": true,
+                "bottleneck_hparams": {
+                    "in_dim": 512,
+                    "reduction_factor": 16,
+                    "norm": "weightnorm",
+                    "non_linearity": "leakyrelu",
+                    "use_partial_padding": true,
+                    "kernel_size": 1
+                },
+                "n_bins": 16,
+                "use_quadratic": true,
+                "n_spline_steps": 4
+            }
+        },
+        "energy_model_config": {
+            "name": "bgap",
+            "hparams": {
+                "n_in_dim": 2,
+                "take_log_of_input": false,
+                "n_speaker_dim": 16,
+                "n_flows": 6,
+                "n_group_size": 4,
+                "n_layers": 4,
+                "kernel_size": 5,
+                "scaling_fn": "tanh",
+                "with_dilation": true,
+                "bottleneck_hparams": {
+                    "in_dim": 512,
+                    "reduction_factor": 16,
+                    "norm": "weightnorm",
+                    "non_linearity": "leakyrelu",
+                    "use_partial_padding": true,
+                    "kernel_size": 1
+                },
+                "n_bins": 16,
+                "use_quadratic": true,
+                "n_spline_steps": 4
+            }
+        },
+        "v_model_config": {
+            "name": "dap",
+            "hparams": {
+                "n_speaker_dim": 16,
+                "take_log_of_input": false,
+                "bottleneck_hparams": {
+                    "in_dim": 512,
+                    "reduction_factor": 16,
+                    "norm": "weightnorm",
+                    "non_linearity": "relu"
+                },
+                "arch_hparams": {
+                    "out_dim": 1,
+                    "n_layers": 2,
+                    "n_channels": 256,
+                    "kernel_size": 3,
+                    "p_dropout": 0.5,
+                    "lstm_type": "",
+                    "use_linear": 1,
+                    "in_dim": 48
+                }
+            }
+        }
+    }
+}

radtts-pp-bgap-model/logs/events.out.tfevents.1683933666.desktop.2175284.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3bcca9dd1e8b06e07cd145b57f67315d1af1c366b6983c2a8037ae31c0a513c
+size 42834375

radtts-pp-bgap-model/model_bgap_50000.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2dc49ada6e8169fe62f4eae7b3a7f7809763be33835edf83abc92e6066fab5e
+size 1957493741