Spaces:
Sleeping
Sleeping
from flask import Flask, request, jsonify | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
import numpy as np | |
import torch | |
app = Flask(__name__) | |
processor = Wav2Vec2Processor.from_pretrained("oyqiz/uzbek_stt") | |
model = Wav2Vec2ForCTC.from_pretrained("oyqiz/uzbek_stt") | |
SAMPLE_RATE = 16000 | |
def index(): | |
return jsonify({"message": "Welcome to whisper uz!"}) | |
def transcribe(): | |
data_frames = request.data | |
audio_np = np.frombuffer(data_frames, dtype=np.int16) | |
audio_np = audio_np / np.iinfo(np.int16).max | |
inputs = processor(audio_np, sampling_rate=SAMPLE_RATE, return_tensors="pt") | |
with torch.no_grad(): | |
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.decode(predicted_ids[0]) | |
return transcription | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=7860) |