import grpc import pyaudio import wave import io import threading import queue import seamless_m4t_pb2 import seamless_m4t_pb2_grpc debug = False def record_audio_to_queue(audio_queue, chunk_duration_s, sample_rate=16000, channels=1): """ Captures audio from the sound card and pushes it to a queue in WAV format. """ # Initialize PyAudio p = pyaudio.PyAudio() window=8192 #4096 # Open the audio stream stream = p.open(format=pyaudio.paInt16, channels=channels, rate=sample_rate, input=True, frames_per_buffer=window) # Increased buffer size if debug: print("Recording audio... Press Ctrl+C to stop.") try: while True: frames = [] # Record for the specified duration for _ in range(0, int(sample_rate / 1024 * chunk_duration_s)): try: data = stream.read(window, exception_on_overflow=False) # Handle overflow frames.append(data) except OSError as e: print(f"Audio buffer overflow: {e}") break # Write audio data to WAV format in memory with io.BytesIO() as wav_buffer: with wave.open(wav_buffer, 'wb') as wf: wf.setnchannels(channels) wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) wf.setframerate(sample_rate) wf.writeframes(b"".join(frames)) audio_queue.put(wav_buffer.getvalue()) except KeyboardInterrupt: print("Stopped recording.") finally: if stream.is_active(): stream.stop_stream() stream.close() p.terminate() audio_queue.put(None) # Signal the end of recording def send_audio_chunks_to_server(audio_queue, server_address, tgt_lang): """ Sends audio chunks from the queue to the gRPC server and prints the transcriptions. """ # Connect to the gRPC server channel = grpc.insecure_channel(server_address) stub = seamless_m4t_pb2_grpc.SeamlessM4TServiceStub(channel) chunk_id = 0 while True: audio_data = audio_queue.get() if audio_data is None: # End of recording break try: if debug: print(f"Sending chunk {chunk_id} to server...") # Create and send the request request = seamless_m4t_pb2.SpeechToTextRequest(audio=audio_data, tgt_lang=tgt_lang) response = stub.SpeechToText(request) # Print the response print(f"{response.text}") chunk_id += 1 except grpc.RpcError as e: print(f"gRPC Error: {e.code()} - {e.details()}") except Exception as e: print(f"Unexpected error: {e}") if __name__ == "__main__": # Parameters chunk_duration_s = 1 # Record in 2-second chunks server_address = "localhost:9090" # gRPC server address tgt_lang = "eng" # Target language for transcription # Create a queue to share audio chunks between threads audio_queue = queue.Queue() # Start the recording and sending threads recorder_thread = threading.Thread(target=record_audio_to_queue, args=(audio_queue, chunk_duration_s)) sender_thread = threading.Thread(target=send_audio_chunks_to_server, args=(audio_queue, server_address, tgt_lang)) recorder_thread.start() sender_thread.start() # Wait for both threads to finish recorder_thread.join() sender_thread.join() print("Recording and transcription completed.")