from utils import transcribe as transcribe_api # from SoundScribe.speakerID import find_user import sounddevice as sd import soundfile as sf import numpy as np import threading import whisper import time SAMPLE_RATE = 16000 CHANNELS = 1 BLOCKSIZE = 8000 DURATION = 0.5 THRESHOLD = 0.015 SILENT_THRESHOLD = 3 silence_duration = 0 output_file = sf.SoundFile( 'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS) model = whisper.load_model("base") transcription_in_progress = False queued = False def transcribe(audio): result = model.transcribe(audio) transcription = result['text'] # user = find_user("database/recording.wav") user = "Vatsal" if user != "Crystal": with open('./database/input.txt', 'w', encoding="utf-8") as write_to: write_to.write(transcription[1:]) return transcription, user def transcription(): global transcription_in_progress transcription, user = transcribe_api('database/recording.wav') print("-"*100) print(f'Transcription: {transcription} from user {user}') print("-"*100) transcription_in_progress = False def listen(model, stream): global transcription_in_progress global queued global silence_duration global output_file audio_data, _ = stream.read(BLOCKSIZE) output_file.write(audio_data) time.sleep(0.5) audio_data, _ = stream.read(int(DURATION * SAMPLE_RATE)) output_file.write(audio_data) if float(np.abs(audio_data).mean()) > THRESHOLD: silence_duration = 0 if transcription_in_progress: print('Audio detected! Transcribing...') queued = True else: transcription_in_progress = True print('Audio detected! Transcribing...') threading.Thread(target=transcription).start() elif float(np.abs(audio_data).mean()) < THRESHOLD: silence_duration += BLOCKSIZE / float(SAMPLE_RATE) if silence_duration >= SILENT_THRESHOLD: if queued: transcription() queued = False silence_duration = 0 output_file.close() audio_data = None output_file = sf.SoundFile( 'database/recording.wav', mode='w', samplerate=SAMPLE_RATE, channels=CHANNELS) def live_listen(): with sd.InputStream(channels=CHANNELS, blocksize=BLOCKSIZE, samplerate=SAMPLE_RATE) as stream: print("STARTING LIVE TRANSCRIPTION") while True: listen(model, stream) if __name__ == "__main__": live_listen()