Spaces:
Runtime error
Runtime error
import grpc | |
import pyaudio | |
import wave | |
import io | |
import threading | |
import queue | |
import seamless_m4t_pb2 | |
import seamless_m4t_pb2_grpc | |
debug = False | |
def record_audio_to_queue(audio_queue, chunk_duration_s, sample_rate=16000, channels=1): | |
""" | |
Captures audio from the sound card and pushes it to a queue in WAV format. | |
""" | |
# Initialize PyAudio | |
p = pyaudio.PyAudio() | |
window=8192 #4096 | |
# Open the audio stream | |
stream = p.open(format=pyaudio.paInt16, | |
channels=channels, | |
rate=sample_rate, | |
input=True, | |
frames_per_buffer=window) # Increased buffer size | |
if debug: | |
print("Recording audio... Press Ctrl+C to stop.") | |
try: | |
while True: | |
frames = [] | |
# Record for the specified duration | |
for _ in range(0, int(sample_rate / 1024 * chunk_duration_s)): | |
try: | |
data = stream.read(window, exception_on_overflow=False) # Handle overflow | |
frames.append(data) | |
except OSError as e: | |
print(f"Audio buffer overflow: {e}") | |
break | |
# Write audio data to WAV format in memory | |
with io.BytesIO() as wav_buffer: | |
with wave.open(wav_buffer, 'wb') as wf: | |
wf.setnchannels(channels) | |
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) | |
wf.setframerate(sample_rate) | |
wf.writeframes(b"".join(frames)) | |
audio_queue.put(wav_buffer.getvalue()) | |
except KeyboardInterrupt: | |
print("Stopped recording.") | |
finally: | |
if stream.is_active(): | |
stream.stop_stream() | |
stream.close() | |
p.terminate() | |
audio_queue.put(None) # Signal the end of recording | |
def send_audio_chunks_to_server(audio_queue, server_address, tgt_lang): | |
""" | |
Sends audio chunks from the queue to the gRPC server and prints the transcriptions. | |
""" | |
# Connect to the gRPC server | |
channel = grpc.insecure_channel(server_address) | |
stub = seamless_m4t_pb2_grpc.SeamlessM4TServiceStub(channel) | |
chunk_id = 0 | |
while True: | |
audio_data = audio_queue.get() | |
if audio_data is None: # End of recording | |
break | |
try: | |
if debug: | |
print(f"Sending chunk {chunk_id} to server...") | |
# Create and send the request | |
request = seamless_m4t_pb2.SpeechToTextRequest(audio=audio_data, tgt_lang=tgt_lang) | |
response = stub.SpeechToText(request) | |
# Print the response | |
print(f"{response.text}") | |
chunk_id += 1 | |
except grpc.RpcError as e: | |
print(f"gRPC Error: {e.code()} - {e.details()}") | |
except Exception as e: | |
print(f"Unexpected error: {e}") | |
if __name__ == "__main__": | |
# Parameters | |
chunk_duration_s = 1 # Record in 2-second chunks | |
server_address = "localhost:9090" # gRPC server address | |
tgt_lang = "eng" # Target language for transcription | |
# Create a queue to share audio chunks between threads | |
audio_queue = queue.Queue() | |
# Start the recording and sending threads | |
recorder_thread = threading.Thread(target=record_audio_to_queue, args=(audio_queue, chunk_duration_s)) | |
sender_thread = threading.Thread(target=send_audio_chunks_to_server, args=(audio_queue, server_address, tgt_lang)) | |
recorder_thread.start() | |
sender_thread.start() | |
# Wait for both threads to finish | |
recorder_thread.join() | |
sender_thread.join() | |
print("Recording and transcription completed.") | |