|
from playsound import playsound |
|
from utils import setup_device |
|
from TTS.api import TTS |
|
import numpy as np |
|
import soundfile |
|
import pyaudio |
|
import wave |
|
|
|
|
|
FRAMES_PER_BUFFER = 1000 |
|
FORMAT = pyaudio.paInt16 |
|
CHANNELS = 1 |
|
RATE = 23500 |
|
|
|
|
|
device = setup_device() |
|
|
|
tts = TTS("tts_models/en/jenny/jenny").to(device) |
|
|
|
def add_echo(audio_file, output_file): |
|
data, samplerate = soundfile.read(audio_file) |
|
soundfile.write(audio_file, data, samplerate) |
|
|
|
file = wave.open(audio_file, 'rb') |
|
sample_freq = file.getframerate() |
|
frames = file.getnframes() |
|
signal_wave = file.readframes(frames) |
|
file.close() |
|
|
|
pa = pyaudio.PyAudio() |
|
|
|
signal = np.frombuffer(signal_wave, dtype=np.int16) |
|
|
|
echo_gain = 0.4 |
|
echo_delay = int(0.02 * sample_freq) |
|
|
|
echo = np.zeros(len(signal) + echo_delay, dtype=np.int16) |
|
echo[echo_delay:echo_delay+len(signal)] = signal * echo_gain |
|
|
|
output = signal + echo[:len(signal)] |
|
|
|
with wave.open(output_file, "wb") as out_file: |
|
out_file.setnchannels(CHANNELS) |
|
out_file.setsampwidth(pa.get_sample_size(FORMAT)) |
|
out_file.setframerate(RATE) |
|
out_file.writeframes(output.tobytes()) |
|
|
|
pa.terminate() |
|
|
|
|
|
def speak(text): |
|
audio_file = "./database/audio.wav" |
|
|
|
tts.tts_to_file(text=text, file_path=audio_file) |
|
|
|
with open("./database/recognition.txt", 'w') as recognition: |
|
recognition.write('') |
|
|
|
add_echo(audio_file, audio_file) |
|
add_echo(audio_file, audio_file) |
|
|
|
playsound(audio_file) |
|
|
|
if __name__ == "__main__": |
|
speak("Hello! I am CRYSTAL! How can I help you today?") |
|
|