seamless_stt / h62_client.py
doctorx's picture
Upload 12 files
40f1ec3 verified
import grpc
import pyaudio
import wave
import io
import threading
import queue
import seamless_m4t_pb2
import seamless_m4t_pb2_grpc
debug = False
def record_audio_to_queue(audio_queue, chunk_duration_s, sample_rate=16000, channels=1):
"""
Captures audio from the sound card and pushes it to a queue in WAV format.
"""
# Initialize PyAudio
p = pyaudio.PyAudio()
window=8192 #4096
# Open the audio stream
stream = p.open(format=pyaudio.paInt16,
channels=channels,
rate=sample_rate,
input=True,
frames_per_buffer=window) # Increased buffer size
if debug:
print("Recording audio... Press Ctrl+C to stop.")
try:
while True:
frames = []
# Record for the specified duration
for _ in range(0, int(sample_rate / 1024 * chunk_duration_s)):
try:
data = stream.read(window, exception_on_overflow=False) # Handle overflow
frames.append(data)
except OSError as e:
print(f"Audio buffer overflow: {e}")
break
# Write audio data to WAV format in memory
with io.BytesIO() as wav_buffer:
with wave.open(wav_buffer, 'wb') as wf:
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(sample_rate)
wf.writeframes(b"".join(frames))
audio_queue.put(wav_buffer.getvalue())
except KeyboardInterrupt:
print("Stopped recording.")
finally:
if stream.is_active():
stream.stop_stream()
stream.close()
p.terminate()
audio_queue.put(None) # Signal the end of recording
def send_audio_chunks_to_server(audio_queue, server_address, tgt_lang):
"""
Sends audio chunks from the queue to the gRPC server and prints the transcriptions.
"""
# Connect to the gRPC server
channel = grpc.insecure_channel(server_address)
stub = seamless_m4t_pb2_grpc.SeamlessM4TServiceStub(channel)
chunk_id = 0
while True:
audio_data = audio_queue.get()
if audio_data is None: # End of recording
break
try:
if debug:
print(f"Sending chunk {chunk_id} to server...")
# Create and send the request
request = seamless_m4t_pb2.SpeechToTextRequest(audio=audio_data, tgt_lang=tgt_lang)
response = stub.SpeechToText(request)
# Print the response
print(f"{response.text}")
chunk_id += 1
except grpc.RpcError as e:
print(f"gRPC Error: {e.code()} - {e.details()}")
except Exception as e:
print(f"Unexpected error: {e}")
if __name__ == "__main__":
# Parameters
chunk_duration_s = 1 # Record in 2-second chunks
server_address = "localhost:9090" # gRPC server address
tgt_lang = "eng" # Target language for transcription
# Create a queue to share audio chunks between threads
audio_queue = queue.Queue()
# Start the recording and sending threads
recorder_thread = threading.Thread(target=record_audio_to_queue, args=(audio_queue, chunk_duration_s))
sender_thread = threading.Thread(target=send_audio_chunks_to_server, args=(audio_queue, server_address, tgt_lang))
recorder_thread.start()
sender_thread.start()
# Wait for both threads to finish
recorder_thread.join()
sender_thread.join()
print("Recording and transcription completed.")