|
from flask import Flask, request, jsonify, render_template |
|
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor |
|
import torch |
|
import torchaudio |
|
import os |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic" |
|
processor = Wav2Vec2Processor.from_pretrained(model_name) |
|
model = Wav2Vec2ForCTC.from_pretrained(model_name) |
|
|
|
|
|
UPLOAD_FOLDER = 'uploads' |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
|
|
|
if not os.path.exists(UPLOAD_FOLDER): |
|
os.makedirs(UPLOAD_FOLDER) |
|
|
|
@app.route('/') |
|
def index(): |
|
return render_template('index.html') |
|
|
|
@app.route('/transcribe', methods=['POST']) |
|
def transcribe_audio(): |
|
if 'file' not in request.files: |
|
return jsonify({'error': 'No file part'}), 400 |
|
|
|
file = request.files['file'] |
|
|
|
if file.filename == '': |
|
return jsonify({'error': 'No selected file'}), 400 |
|
|
|
if file: |
|
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) |
|
file.save(file_path) |
|
|
|
|
|
speech_array, sampling_rate = torchaudio.load(file_path) |
|
speech_array = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(speech_array) |
|
|
|
|
|
input_values = processor(speech_array.squeeze().numpy(), return_tensors="pt", sampling_rate=16000).input_values |
|
|
|
|
|
with torch.no_grad(): |
|
logits = model(input_values).logits |
|
|
|
|
|
predicted_ids = torch.argmax(logits, dim=-1) |
|
transcription = processor.batch_decode(predicted_ids) |
|
|
|
return jsonify({'transcription': transcription[0]}) |
|
|
|
return jsonify({'error': 'Something went wrong!'}), 500 |
|
|
|
if __name__ == '__main__': |
|
app.run(debug=True) |
|
|
|
|