rewrite oneclick_train

2026-03-07 14:24:21 +00:00 · 2023-08-28 23:56:39 +09:00
parent 740625fd2b
commit 6e14c7b5f5
11 changed files with 21 additions and 270 deletions
--- a/infer-web.py
+++ b/infer-web.py
@@ -705,280 +705,21 @@ def train1key(
        infos.append(strr)
        return "\n".join(infos)

-    model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
-    preprocess_log_path = "%s/preprocess.log" % model_log_dir
-    extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
-    gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
-    feature_dir = (
-        "%s/3_feature256" % model_log_dir
-        if version19 == "v1"
-        else "%s/3_feature768" % model_log_dir
-    )
-
-    os.makedirs(model_log_dir, exist_ok=True)
-    #########step1:处理数据
-    open(preprocess_log_path, "w").close()
-    cmd = (
-        get_quoted_python_cmd()
-        + ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s" '
-        % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
-        + str(config.noparallel)
-    )
+    ####### step1:处理数据
    yield get_info_str(i18n("step1:正在处理数据"))
-    yield get_info_str(cmd)
-    p = Popen(cmd, shell=True)
-    p.wait()
-    with open(preprocess_log_path, "r") as f:
-        print(f.read())
-    #########step2a:提取音高
-    open(extract_f0_feature_log_path, "w")
-    if if_f0_3:
-        yield get_info_str("step2a:正在提取音高")
-        if f0method8 != "rmvpe_gpu":
-            cmd = config.python_cmd + ' extract_f0_print.py "%s" %s %s' % (
-                model_log_dir,
-                np7,
-                f0method8,
-            )
-            yield get_info_str(cmd)
-            p = Popen(cmd, shell=True, cwd=now_dir)
-            p.wait()
-        else:
-            if gpus_rmvpe != "-":
-                gpus_rmvpe = gpus_rmvpe.split("-")
-                leng = len(gpus_rmvpe)
-                ps = []
-                for idx, n_g in enumerate(gpus_rmvpe):
-                    cmd = (
-                        get_quoted_python_cmd()
-                        + ' extract_f0_rmvpe.py %s %s %s "%s" %s '
-                        % (
-                            leng,
-                            idx,
-                            n_g,
-                            model_log_dir,
-                            config.is_half,
-                        )
-                    )
-                    yield get_info_str(cmd)
-                    p = Popen(
-                        cmd, shell=True, cwd=now_dir
-                    )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
-                    ps.append(p)
-                for p in ps:
-                    p.wait()
-            else:  # dml
-                cmd = config.python_cmd + ' extract_f0_rmvpe_dml.py "%s" ' % (
-                    model_log_dir
-                )
-                yield get_info_str(cmd)
-                p = Popen(
-                    cmd, shell=True, cwd=now_dir
-                )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
-                p.wait()
-        with open(extract_f0_feature_log_path, "r") as f:
-            print(f.read())
-    else:
-        yield get_info_str(i18n("step2a:无需提取音高"))
-    #######step2b:提取特征
-    yield get_info_str(i18n("step2b:正在提取特征"))
-    gpus = gpus16.split("-")
-    leng = len(gpus)
-    ps = []
-    for idx, n_g in enumerate(gpus):
-        cmd = (
-            get_quoted_python_cmd()
-            + ' extract_feature_print.py %s %s %s %s "%s" %s'
-            % (
-                config.device,
-                leng,
-                idx,
-                n_g,
-                model_log_dir,
-                version19,
-            )
-        )
-        yield get_info_str(cmd)
-        p = Popen(
-            cmd, shell=True, cwd=now_dir
-        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
-        ps.append(p)
-    for p in ps:
-        p.wait()
-    with open(extract_f0_feature_log_path, "r") as f:
-        print(f.read())
-    #######step3a:训练模型
+    [get_info_str(_) for _ in preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7)]
+
+    ####### step2a:提取音高
+    yield get_info_str(i18n("step2:正在提取音高&正在提取特征"))
+    [get_info_str(_) for _ in extract_f0_feature(gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe)]
+
+    ####### step3a:训练模型
    yield get_info_str(i18n("step3a:正在训练模型"))
-    # 生成filelist
-    if if_f0_3:
-        f0_dir = "%s/2a_f0" % model_log_dir
-        f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
-        names = (
-            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
-            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
-            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
-            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
-        )
-    else:
-        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
-            [name.split(".")[0] for name in os.listdir(feature_dir)]
-        )
-    opt = []
-    for name in names:
-        if if_f0_3:
-            opt.append(
-                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
-                % (
-                    gt_wavs_dir.replace("\\", "\\\\"),
-                    name,
-                    feature_dir.replace("\\", "\\\\"),
-                    name,
-                    f0_dir.replace("\\", "\\\\"),
-                    name,
-                    f0nsf_dir.replace("\\", "\\\\"),
-                    name,
-                    spk_id5,
-                )
-            )
-        else:
-            opt.append(
-                "%s/%s.wav|%s/%s.npy|%s"
-                % (
-                    gt_wavs_dir.replace("\\", "\\\\"),
-                    name,
-                    feature_dir.replace("\\", "\\\\"),
-                    name,
-                    spk_id5,
-                )
-            )
-    fea_dim = 256 if version19 == "v1" else 768
-    if if_f0_3:
-        for _ in range(2):
-            opt.append(
-                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
-                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
-            )
-    else:
-        for _ in range(2):
-            opt.append(
-                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
-                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
-            )
-    shuffle(opt)
-    with open("%s/filelist.txt" % model_log_dir, "w") as f:
-        f.write("\n".join(opt))
-    yield get_info_str("write filelist done")
-    if gpus16:
-        cmd = get_quoted_python_cmd() + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' % (
-            exp_dir1,
-            sr2,
-            1 if if_f0_3 else 0,
-            batch_size12,
-            gpus16,
-            total_epoch11,
-            save_epoch10,
-            "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
-            "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
-            1 if if_save_latest13 == i18n("是") else 0,
-            1 if if_cache_gpu17 == i18n("是") else 0,
-            1 if if_save_every_weights18 == i18n("是") else 0,
-            version19,
-        )
-    else:
-        cmd = (
-            config.python_cmd
-            + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
-            % (
-                exp_dir1,
-                sr2,
-                1 if if_f0_3 else 0,
-                batch_size12,
-                total_epoch11,
-                save_epoch10,
-                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
-                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
-                1 if if_save_latest13 == i18n("是") else 0,
-                1 if if_cache_gpu17 == i18n("是") else 0,
-                1 if if_save_every_weights18 == i18n("是") else 0,
-                version19,
-            )
-        )
-    yield get_info_str(cmd)
-    p = Popen(cmd, shell=True, cwd=now_dir)
-    p.wait()
+    click_train(exp_dir1, sr2, if_f0_3, spk_id5, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17, if_save_every_weights18, version19)
    yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
-    #######step3b:训练索引
-    npys = []
-    listdir_res = list(os.listdir(feature_dir))
-    for name in sorted(listdir_res):
-        phone = np.load("%s/%s" % (feature_dir, name))
-        npys.append(phone)
-    big_npy = np.concatenate(npys, 0)

-    big_npy_idx = np.arange(big_npy.shape[0])
-    np.random.shuffle(big_npy_idx)
-    big_npy = big_npy[big_npy_idx]
-
-    if big_npy.shape[0] > 2e5:
-        # if(1):
-        info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]
-        print(info)
-        yield get_info_str(info)
-        try:
-            big_npy = (
-                MiniBatchKMeans(
-                    n_clusters=10000,
-                    verbose=True,
-                    batch_size=256 * config.n_cpu,
-                    compute_labels=False,
-                    init="random",
-                )
-                .fit(big_npy)
-                .cluster_centers_
-            )
-        except:
-            info = traceback.format_exc()
-            print(info)
-            yield get_info_str(info)
-
-    np.save("%s/total_fea.npy" % model_log_dir, big_npy)
-    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
-    yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
-    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
-    yield get_info_str("training index")
-    index_ivf = faiss.extract_index_ivf(index)  #
-    index_ivf.nprobe = 1
-    index.train(big_npy)
-    faiss.write_index(
-        index,
-        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
-        % (
-            model_log_dir.replace(now_dir + "/", ""),
-            n_ivf,
-            index_ivf.nprobe,
-            exp_dir1,
-            version19,
-        ),
-    )
-    yield get_info_str("adding index")
-    batch_size_add = 8192
-    for i in range(0, big_npy.shape[0], batch_size_add):
-        index.add(big_npy[i : i + batch_size_add])
-    faiss.write_index(
-        index,
-        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
-        % (
-            model_log_dir.replace(now_dir + "/", ""),
-            n_ivf,
-            index_ivf.nprobe,
-            exp_dir1,
-            version19,
-        ),
-    )
-    yield get_info_str(
-        "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
-        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
-    )
+    ####### step3b:训练索引
+    [get_info_str(_) for _ in train_index(exp_dir1, version19)]
    yield get_info_str(i18n("全流程结束！"))