Spaces:

doctorx
/

seamless_stt

Runtime error

File size: 3,679 Bytes

40f1ec3

import grpc
import pyaudio
import wave
import io
import threading
import queue
import seamless_m4t_pb2
import seamless_m4t_pb2_grpc

debug = False

def record_audio_to_queue(audio_queue, chunk_duration_s, sample_rate=16000, channels=1):
    """
    Captures audio from the sound card and pushes it to a queue in WAV format.
    """
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    window=8192 #4096

    # Open the audio stream
    stream = p.open(format=pyaudio.paInt16,
                    channels=channels,
                    rate=sample_rate,
                    input=True,
                    frames_per_buffer=window)  # Increased buffer size

    if debug:
        print("Recording audio... Press Ctrl+C to stop.")
    try:
        while True:
            frames = []

            # Record for the specified duration
            for _ in range(0, int(sample_rate / 1024 * chunk_duration_s)):
                try:
                    data = stream.read(window, exception_on_overflow=False)  # Handle overflow
                    frames.append(data)
                except OSError as e:
                    print(f"Audio buffer overflow: {e}")
                    break

            # Write audio data to WAV format in memory
            with io.BytesIO() as wav_buffer:
                with wave.open(wav_buffer, 'wb') as wf:
                    wf.setnchannels(channels)
                    wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
                    wf.setframerate(sample_rate)
                    wf.writeframes(b"".join(frames))
                audio_queue.put(wav_buffer.getvalue())

    except KeyboardInterrupt:
        print("Stopped recording.")
    finally:
        if stream.is_active():
            stream.stop_stream()
        stream.close()
        p.terminate()
        audio_queue.put(None)  # Signal the end of recording


def send_audio_chunks_to_server(audio_queue, server_address, tgt_lang):
    """
    Sends audio chunks from the queue to the gRPC server and prints the transcriptions.
    """
    # Connect to the gRPC server
    channel = grpc.insecure_channel(server_address)
    stub = seamless_m4t_pb2_grpc.SeamlessM4TServiceStub(channel)

    chunk_id = 0
    while True:
        audio_data = audio_queue.get()
        if audio_data is None:  # End of recording
            break

        try:
            if debug:
                print(f"Sending chunk {chunk_id} to server...")

            # Create and send the request
            request = seamless_m4t_pb2.SpeechToTextRequest(audio=audio_data, tgt_lang=tgt_lang)
            response = stub.SpeechToText(request)

            # Print the response
            print(f"{response.text}")
            chunk_id += 1

        except grpc.RpcError as e:
            print(f"gRPC Error: {e.code()} - {e.details()}")
        except Exception as e:
            print(f"Unexpected error: {e}")


if __name__ == "__main__":
    # Parameters
    chunk_duration_s = 1  # Record in 2-second chunks
    server_address = "localhost:9090"  # gRPC server address
    tgt_lang = "eng"  # Target language for transcription

    # Create a queue to share audio chunks between threads
    audio_queue = queue.Queue()

    # Start the recording and sending threads
    recorder_thread = threading.Thread(target=record_audio_to_queue, args=(audio_queue, chunk_duration_s))
    sender_thread = threading.Thread(target=send_audio_chunks_to_server, args=(audio_queue, server_address, tgt_lang))

    recorder_thread.start()
    sender_thread.start()

    # Wait for both threads to finish
    recorder_thread.join()
    sender_thread.join()

    print("Recording and transcription completed.")