diff --git a/configs/config.json b/configs/config.json index d7e30de..650bbf5 100644 --- a/configs/config.json +++ b/configs/config.json @@ -22,7 +22,7 @@ "c_kl_r": 0.01, "use_sr": true, "max_speclen": 512, - "port": "8001", + "port": "64435", "keep_ckpts": 2, "all_in_mem": false, "vol_aug": false @@ -38,6 +38,8 @@ "n_mel_channels": 128, "mel_fmin": 0.0, "mel_fmax": null, + "min_file_length": 0.3, + "max_file_length": 10.0, "unit_interpolate_mode": "nearest" }, "model": { diff --git a/data_utils.py b/data_utils.py index 7f27c62..f5f778a 100644 --- a/data_utils.py +++ b/data_utils.py @@ -39,6 +39,8 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False): self.num_mels = hparams.data.n_mel_channels self.mel_fmin = hparams.data.mel_fmin self.mel_fmax = hparams.data.mel_fmax + self.min_file_length = hparams.data.min_file_length * self.sampling_rate + self.max_file_length = hparams.data.max_file_length * self.sampling_rate # self.spk_map_inv = {v: k for k, v in self.spk_map.items()} random.seed(1234) @@ -52,9 +54,9 @@ def __init__(self, audiopaths, hparams, all_in_mem: bool = False): def _filter_long_files(self, audio_paths): filtered = [] - max_length = 22050 * 9.0 # 10 seconds + for p, speaker in audio_paths: - if (Path(p).stat().st_size // 2) < max_length: + if self.min_file_length <(Path(p).stat().st_size // 2) < self.max_file_length: filtered.append([p, speaker]) print("Audiopaths before filtering:", len(audio_paths)) @@ -125,11 +127,7 @@ def get_audio(self, filename): ) audio_norm = audio_norm[:, : lmin * self.hop_length] - if spec.shape[1] < 30: - print("skip too short audio:", filename) - return None - else: - return c, f0, spec, audio_norm, uv, ppg + return c, f0, spec, audio_norm, uv, ppg def random_slice(self, c, f0, spec, audio_norm, uv, ppg): if spec.shape[1] > 800: diff --git a/preprocess_f0_hubert.py b/preprocess_f0_hubert.py index 9ff2339..1f9c176 100644 --- a/preprocess_f0_hubert.py +++ b/preprocess_f0_hubert.py @@ -81,7 +81,11 @@ def parallel_process(filenames, num_processes, f0p, device): with open("/workspace/vc_train.csv", "r") as f: for line in f: file_path = line.split("|")[0] - wav_paths.append(file_path.strip()) + soft_path = file_path.replace(".wav", ".soft.pt") + f0_path = file_path.replace(".wav", ".rmvpe.pt") + + if not os.path.exists(soft_path) and not os.path.exists(f0_path): + wav_paths.append(file_path.strip()) # preprocess f0 and hubert - parallel_process(wav_paths, 4, f0p, device) + parallel_process(wav_paths, 6, f0p, device)