Spaces:

TIMBOVILL
/

ApplioTest

Runtime error

App Files Files Community

TIMBOVILL commited on Jan 20

Commit

5cf4082

•

1 Parent(s): ea76d52

Create tabs/inference/inference.py

Browse files

Files changed (1) hide show

tabs/inference/inference.py +437 -0

tabs/inference/inference.py ADDED Viewed

	@@ -0,0 +1,437 @@

+import os, sys
+import gradio as gr
+import regex as re
+import shutil
+import datetime
+import random
+from core import (
+    run_infer_script,
+    run_batch_infer_script,
+)
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+model_root = os.path.join(now_dir, "logs")
+audio_root = os.path.join(now_dir, "assets", "audios")
+sup_audioext = {
+    "wav",
+    "mp3",
+    "flac",
+    "ogg",
+    "opus",
+    "m4a",
+    "mp4",
+    "aac",
+    "alac",
+    "wma",
+    "aiff",
+    "webm",
+    "ac3",
+}
+names = [
+    os.path.join(root, file)
+    for root, _, files in os.walk(model_root, topdown=False)
+    for file in files
+    if file.endswith((".pth", ".onnx"))
+]
+indexes_list = [
+    os.path.join(root, name)
+    for root, _, files in os.walk(model_root, topdown=False)
+    for name in files
+    if name.endswith(".index") and "trained" not in name
+]
+audio_paths = [
+    os.path.join(root, name)
+    for root, _, files in os.walk(audio_root, topdown=False)
+    for name in files
+    if name.endswith(tuple(sup_audioext))
+    and root == audio_root
+    and "_output" not in name
+]
+def output_path_fn(input_audio_path):
+    original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
+        0
+    ]
+    new_name = original_name_without_extension + "_output.wav"
+    output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
+    return output_path
+def change_choices():
+    names = [
+        os.path.join(root, file)
+        for root, _, files in os.walk(model_root, topdown=False)
+        for file in files
+        if file.endswith((".pth", ".onnx"))
+    ]
+    indexes_list = [
+        os.path.join(root, name)
+        for root, _, files in os.walk(model_root, topdown=False)
+        for name in files
+        if name.endswith(".index") and "trained" not in name
+    ]
+    audio_paths = [
+        os.path.join(root, name)
+        for root, _, files in os.walk(audio_root, topdown=False)
+        for name in files
+        if name.endswith(tuple(sup_audioext))
+        and root == audio_root
+        and "_output" not in name
+    ]
+    return (
+        {"choices": sorted(names), "__type__": "update"},
+        {"choices": sorted(indexes_list), "__type__": "update"},
+        {"choices": sorted(audio_paths), "__type__": "update"},
+    )
+def get_indexes():
+    indexes_list = [
+        os.path.join(dirpath, filename)
+        for dirpath, _, filenames in os.walk(model_root)
+        for filename in filenames
+        if filename.endswith(".index") and "trained" not in filename
+    ]
+    return indexes_list if indexes_list else ""
+def match_index(model_file: str) -> tuple:
+    model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file)
+    model_file_name = os.path.split(model_files_trip)[
+        -1
+    ]  # Extract only the name, not the directory
+    # Check if the sid0strip has the specific ending format _eXXX_sXXX
+    if re.match(r".+_e\d+_s\d+$", model_file_name):
+        base_model_name = model_file_name.rsplit("_", 2)[0]
+    else:
+        base_model_name = model_file_name
+    sid_directory = os.path.join(model_root, base_model_name)
+    directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
+    directories_to_search.append(model_root)
+    matching_index_files = []
+    for directory in directories_to_search:
+        for filename in os.listdir(directory):
+            if filename.endswith(".index") and "trained" not in filename:
+                # Condition to match the name
+                name_match = any(
+                    name.lower() in filename.lower()
+                    for name in [model_file_name, base_model_name]
+                )
+                # If in the specific directory, it's automatically a match
+                folder_match = directory == sid_directory
+                if name_match or folder_match:
+                    index_path = os.path.join(directory, filename)
+                    if index_path in indexes_list:
+                        matching_index_files.append(
+                            (
+                                index_path,
+                                os.path.getsize(index_path),
+                                " " not in filename,
+                            )
+                        )
+    if matching_index_files:
+        # Sort by favoring files without spaces and by size (largest size first)
+        matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
+        best_match_index_path = matching_index_files[0][0]
+        return best_match_index_path
+    return ""
+def save_to_wav(record_button):
+    if record_button is None:
+        pass
+    else:
+        path_to_file = record_button
+        new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
+        target_path = os.path.join(audio_root, os.path.basename(new_name))
+        shutil.move(path_to_file, target_path)
+        return target_path, output_path_fn(target_path)
+def save_to_wav2(upload_audio):
+    file_path = upload_audio
+    target_path = os.path.join(audio_root, os.path.basename(file_path))
+    if os.path.exists(target_path):
+        os.remove(target_path)
+    shutil.copy(file_path, target_path)
+    return target_path, output_path_fn(target_path)
+def delete_outputs():
+    for root, _, files in os.walk(audio_root, topdown=False):
+        for name in files:
+            if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
+                os.remove(os.path.join(root, name))
+    gr.Info(f"Outputs cleared!")
+# Inference tab
+def inference_tab():
+    default_weight = random.choice(names) if names else ""
+    with gr.Row():
+        with gr.Row():
+            model_file = gr.Dropdown(
+                label=i18n("Voice Model"),
+                choices=sorted(names),
+                interactive=True,
+                value=default_weight,
+                allow_custom_value=True,
+            )
+            best_default_index_path = match_index(model_file.value)
+            index_file = gr.Dropdown(
+                label=i18n("Index File"),
+                choices=get_indexes(),
+                value=best_default_index_path,
+                interactive=True,
+                allow_custom_value=True,
+            )
+        with gr.Column():
+            refresh_button = gr.Button(i18n("Refresh"))
+            unload_button = gr.Button(i18n("Unload Voice"))
+            unload_button.click(
+                fn=lambda: ({"value": "", "__type__": "update"}),
+                inputs=[],
+                outputs=[model_file],
+            )
+            model_file.select(
+                fn=match_index,
+                inputs=[model_file],
+                outputs=[index_file],
+            )
+    # Single inference tab
+    with gr.Tab(i18n("Single")):
+        with gr.Row():
+            with gr.Column():
+                upload_audio = gr.Audio(
+                    label=i18n("Upload Audio"), type="filepath", editable=False
+                )
+                with gr.Row():
+                    audio = gr.Dropdown(
+                        label=i18n("Select Audio"),
+                        choices=sorted(audio_paths),
+                        value=audio_paths[0] if audio_paths else "",
+                        interactive=True,
+                        allow_custom_value=True,
+                    )
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            with gr.Column():
+                clear_outputs = gr.Button(
+                    i18n("Clear Outputs (Deletes all audios in assets/audios)")
+                )
+                output_path = gr.Textbox(
+                    label=i18n("Output Path"),
+                    placeholder=i18n("Enter output path"),
+                    value=output_path_fn(audio_paths[0])
+                    if audio_paths
+                    else os.path.join(now_dir, "assets", "audios", "output.wav"),
+                    interactive=True,
+                )
+                split_audio = gr.Checkbox(
+                    label=i18n("Split Audio"),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                pitch = gr.Slider(-12, 12, 0, label=i18n("Pitch"))
+                filter_radius = gr.Slider(
+                    minimum=0,
+                    maximum=7,
+                    label=i18n(
+                        "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
+                    ),
+                    value=3,
+                    step=1,
+                    interactive=True,
+                )
+                index_rate = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Search Feature Ratio"),
+                    value=0.75,
+                    interactive=True,
+                )
+                hop_length = gr.Slider(
+                    minimum=1,
+                    maximum=512,
+                    step=1,
+                    label=i18n("Hop Length"),
+                    value=128,
+                    interactive=True,
+                )
+            with gr.Column():
+                f0method = gr.Radio(
+                    label=i18n("Pitch extraction algorithm"),
+                    choices=[
+                        "pm",
+                        "harvest",
+                        "dio",
+                        "crepe",
+                        "crepe-tiny",
+                        "rmvpe",
+                    ],
+                    value="rmvpe",
+                    interactive=True,
+                )
+        convert_button1 = gr.Button(i18n("Convert"))
+        with gr.Row():  # Defines output info + output audio download after conversion
+            vc_output1 = gr.Textbox(label=i18n("Output Information"))
+            vc_output2 = gr.Audio(label=i18n("Export Audio"))
+    # Batch inference tab
+    with gr.Tab(i18n("Batch")):
+        with gr.Row():
+            with gr.Column():
+                input_folder_batch = gr.Textbox(
+                    label=i18n("Input Folder"),
+                    placeholder=i18n("Enter input path"),
+                    value=os.path.join(now_dir, "assets", "audios"),
+                    interactive=True,
+                )
+                output_folder_batch = gr.Textbox(
+                    label=i18n("Output Folder"),
+                    placeholder=i18n("Enter output path"),
+                    value=os.path.join(now_dir, "assets", "audios"),
+                    interactive=True,
+                )
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            with gr.Column():
+                clear_outputs = gr.Button(
+                    i18n("Clear Outputs (Deletes all audios in assets/audios)")
+                )
+                pitch_batch = gr.Slider(-12, 12, 0, label=i18n("Pitch"))
+                filter_radius_batch = gr.Slider(
+                    minimum=0,
+                    maximum=7,
+                    label=i18n(
+                        "If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
+                    ),
+                    value=3,
+                    step=1,
+                    interactive=True,
+                )
+                index_rate_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Search Feature Ratio"),
+                    value=0.75,
+                    interactive=True,
+                )
+                hop_length_batch = gr.Slider(
+                    minimum=1,
+                    maximum=512,
+                    step=1,
+                    label=i18n("Hop Length"),
+                    value=128,
+                    interactive=True,
+                )
+            with gr.Column():
+                f0method_batch = gr.Radio(
+                    label=i18n("Pitch extraction algorithm"),
+                    choices=[
+                        "pm",
+                        "harvest",
+                        "dio",
+                        "crepe",
+                        "crepe-tiny",
+                        "rmvpe",
+                    ],
+                    value="rmvpe",
+                    interactive=True,
+                )
+        convert_button2 = gr.Button(i18n("Convert"))
+        with gr.Row():  # Defines output info + output audio download after conversion
+            vc_output3 = gr.Textbox(label=i18n("Output Information"))
+    def toggle_visible(checkbox):
+        return {"visible": checkbox, "__type__": "update"}
+    refresh_button.click(
+        fn=change_choices,
+        inputs=[],
+        outputs=[model_file, index_file, audio],
+    )
+    audio.change(
+        fn=output_path_fn,
+        inputs=[audio],
+        outputs=[output_path],
+    )
+    upload_audio.upload(
+        fn=save_to_wav2,
+        inputs=[upload_audio],
+        outputs=[audio, output_path],
+    )
+    upload_audio.stop_recording(
+        fn=save_to_wav,
+        inputs=[upload_audio],
+        outputs=[audio, output_path],
+    )
+    clear_outputs.click(
+        fn=delete_outputs,
+        inputs=[],
+        outputs=[],
+    )
+    convert_button1.click(
+        fn=run_infer_script,
+        inputs=[
+            pitch,
+            filter_radius,
+            index_rate,
+            hop_length,
+            f0method,
+            audio,
+            output_path,
+            model_file,
+            index_file,
+            split_audio,
+        ],
+        outputs=[vc_output1, vc_output2],
+    )
+    convert_button2.click(
+        fn=run_batch_infer_script,
+        inputs=[
+            pitch_batch,
+            filter_radius_batch,
+            index_rate_batch,
+            hop_length_batch,
+            f0method_batch,
+            input_folder_batch,
+            output_folder_batch,
+            model_file,
+            index_file,
+        ],
+        outputs=[vc_output3],
+    )