from playsound import playsound from utils import setup_device from TTS.api import TTS import numpy as np import soundfile import pyaudio import wave FRAMES_PER_BUFFER = 1000 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 23500 device = setup_device() tts = TTS("tts_models/en/jenny/jenny").to(device) def add_echo(audio_file, output_file): data, samplerate = soundfile.read(audio_file) soundfile.write(audio_file, data, samplerate) file = wave.open(audio_file, 'rb') sample_freq = file.getframerate() frames = file.getnframes() signal_wave = file.readframes(frames) file.close() pa = pyaudio.PyAudio() signal = np.frombuffer(signal_wave, dtype=np.int16) echo_gain = 0.4 echo_delay = int(0.02 * sample_freq) echo = np.zeros(len(signal) + echo_delay, dtype=np.int16) echo[echo_delay:echo_delay+len(signal)] = signal * echo_gain output = signal + echo[:len(signal)] with wave.open(output_file, "wb") as out_file: out_file.setnchannels(CHANNELS) out_file.setsampwidth(pa.get_sample_size(FORMAT)) out_file.setframerate(RATE) out_file.writeframes(output.tobytes()) pa.terminate() def speak(text): audio_file = "./database/audio.wav" tts.tts_to_file(text=text, file_path=audio_file) with open("./database/recognition.txt", 'w') as recognition: recognition.write('') add_echo(audio_file, audio_file) add_echo(audio_file, audio_file) playsound(audio_file) if __name__ == "__main__": speak("Hello! I am CRYSTAL! How can I help you today?")