Bark-with-Voice-Cloning

Runtime error

App Files Files Community

kevinwang676 commited on Jun 13, 2023

Commit

627c134

•

1 Parent(s): 79a08d6

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile (2).txt +38 -0
config (2).yaml +8 -0
gitignore (3).txt +14 -0
pyproject (2).toml +60 -0
setup (2).py +3 -0
swap_voice (2).py +62 -0

Dockerfile (2).txt ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM debian:stable
+# Install system packages
+RUN apt update && apt install -y git pip
+# Create non-root user
+RUN useradd -m -d /bark bark
+# Run as new user
+USER bark
+WORKDIR /bark
+# Clone git repo
+RUN git clone https://github.com/C0untFloyd/bark-gui
+# Switch to git directory
+WORKDIR /bark/bark-gui
+# Append pip bin path to PATH
+ENV PATH=$PATH:/bark/.local/bin
+# Install dependancies
+RUN pip install .
+RUN pip install -r requirements.txt
+# List on all addresses, since we are in a container.
+RUN sed -i "s/server_name: ''/server_name: 0.0.0.0/g" ./config.yaml
+# Suggested volumes
+VOLUME /bark/bark-gui/assets/prompts/custom
+VOLUME /bark/bark-gui/models
+VOLUME /bark/.cache/huggingface/hub
+# Default port for web-ui
+EXPOSE 7860/tcp
+# Start script
+CMD python3 webui.py

config (2).yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+input_text_desired_length: 110
+input_text_max_length: 170
+selected_theme: JohnSmith9982/small_and_pretty
+server_name: ''
+server_port: 0
+server_share: false
+silence_between_sentences: 250
+silence_between_speakers: 500

gitignore (3).txt ADDED Viewed

	@@ -0,0 +1,14 @@

+__pycache__/
+/outputs
+/speakers
+.vs
+*.npz
+*.wav
+*.npy
+.vs/
+/models
+/bark_ui_enhanced.egg-info
+/build/lib/bark
+*.pth
+*.pt
+*.zip

pyproject (2).toml ADDED Viewed

	@@ -0,0 +1,60 @@

+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "bark-ui-enhanced"
+version = "0.7.0"
+description = "Bark text to audio model with addition features and a Web UI"
+readme = "README.md"
+requires-python = ">=3.8"
+authors =  [
+    {name = "Suno Inc (original Bark)", email = "hello@suno.ai"},
+    {name = "Count Floyd"},
+]
+# MIT License
+license = {file = "LICENSE"}
+dependencies = [
+    "boto3",
+    "encodec",
+    "funcy",
+    "huggingface-hub>=0.14.1",
+    "numpy",
+    "scipy",
+    "tokenizers",
+    "torch",
+    "tqdm",
+    "transformers",
+]
+[project.urls]
+source = "https://github.com/C0untFloyd/bark-gui"
+[project.optional-dependencies]
+dev = [
+    "bandit",
+    "black",
+    "codecov",
+    "flake8",
+    "hypothesis>=6.14,<7",
+    "isort>=5.0.0,<6",
+    "jupyter",
+    "mypy",
+    "nbconvert",
+    "nbformat",
+    "pydocstyle",
+    "pylint",
+    "pytest",
+    "pytest-cov",
+]
+[tool.setuptools]
+packages = ["bark"]
+[tool.setuptools.package-data]
+bark = ["assets/prompts/*.npz", "assets/prompts/v2/*.npz"]
+[tool.black]
+line-length = 100

setup (2).py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from setuptools import setup
2	+
3	+ setup()

swap_voice (2).py ADDED Viewed

	@@ -0,0 +1,62 @@

+from bark.generation import load_codec_model, generate_text_semantic, grab_best_device
+from bark import SAMPLE_RATE
+from encodec.utils import convert_audio
+from bark.hubert.hubert_manager import HuBERTManager
+from bark.hubert.pre_kmeans_hubert import CustomHubert
+from bark.hubert.customtokenizer import CustomTokenizer
+from bark.api import semantic_to_waveform
+from scipy.io.wavfile import write as write_wav
+from util.helper import create_filename
+from util.settings import Settings
+import torchaudio
+import torch
+import os
+import gradio
+def swap_voice_from_audio(swap_audio_filename, selected_speaker, tokenizer_lang, seed, batchcount, progress=gradio.Progress(track_tqdm=True)):
+    use_gpu = not os.environ.get("BARK_FORCE_CPU", False)
+    progress(0, desc="Loading Codec")
+    # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
+    hubert_manager = HuBERTManager()
+    hubert_manager.make_sure_hubert_installed()
+    hubert_manager.make_sure_tokenizer_installed(tokenizer_lang=tokenizer_lang)
+    # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer
+    # Load HuBERT for semantic tokens
+    # Load the HuBERT model
+    device = grab_best_device(use_gpu)
+    hubert_model = CustomHubert(checkpoint_path='./models/hubert/hubert.pt').to(device)
+    model = load_codec_model(use_gpu=use_gpu)
+    # Load the CustomTokenizer model
+    tokenizer = CustomTokenizer.load_from_checkpoint(f'./models/hubert/{tokenizer_lang}_tokenizer.pth').to(device)  # Automatically uses the right layers
+    progress(0.25, desc="Converting WAV")
+    # Load and pre-process the audio waveform
+    wav, sr = torchaudio.load(swap_audio_filename)
+    if wav.shape[0] == 2:  # Stereo to mono if needed
+        wav = wav.mean(0, keepdim=True)
+    wav = convert_audio(wav, sr, model.sample_rate, model.channels)
+    wav = wav.to(device)
+    semantic_vectors = hubert_model.forward(wav, input_sample_hz=model.sample_rate)
+    semantic_tokens = tokenizer.get_token(semantic_vectors)
+    audio = semantic_to_waveform(
+        semantic_tokens,
+        history_prompt=selected_speaker,
+        temp=0.7,
+        silent=False,
+        output_full=False)
+    settings = Settings('config.yaml')
+    result = create_filename(settings.output_folder_path, None, "swapvoice",".wav")
+    write_wav(result, SAMPLE_RATE, audio)
+    return result