File size: 1,518 Bytes
a4d117e 0a25040 a4d117e b9c964e a4d117e 44e8c42 0a25040 44e8c42 a4d117e 644ca35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from typing import Dict
from transformers import pipeline
import requests
import jwt
import base64
SAMPLE_RATE = 16000
PUBLIC_KEY = b"-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1SU1LfVLPHCozMxH2Mo\n4lgOEePzNm0tRgeLezV6ffAt0gunVTLw7onLRnrq0/IzW7yWR7QkrmBL7jTKEn5u\n+qKhbwKfBstIs+bMY2Zkp18gnTxKLxoS2tFczGkPLPgizskuemMghRniWaoLcyeh\nkd3qqGElvW/VDL5AaWTg0nLVkjRo9z+40RQzuVaE8AkAFmxZzow3x+VJYKdjykkJ\n0iT9wCS0DRTXu269V264Vf/3jvredZiKRkgwlL9xNAwxXFg0x/XFw005UWVRIkdg\ncKWTjpBP2dPwVZ4WWC+9aGVd+Gyn1o0CLelf4rEjGoXbAAEgAqeGUxrcIlbjXfbc\nmwIDAQAB\n-----END PUBLIC KEY-----"
class EndpointHandler:
def __init__(self, path=""):
self.pipeline = pipeline(
"automatic-speech-recognition", model="openai/whisper-base"
)
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
"""
Args:
data (:obj:):
includes the deserialized audio file as bytes
Return:
A :obj:`dict`:. base64 encoded image
"""
# process input
token = data.pop("token", None)
if token is None:
raise RuntimeError("missing token")
decoded = jwt.decode(token, PUBLIC_KEY, algorithms=["RS512"])
print("received input from jti=", decoded["jti"])
inputs = data.pop("inputs", None)
if isinstance(inputs, str):
inputs = base64.b64decode(inputs)
parameters = data.pop("parameters", {})
return self.pipeline(inputs, **parameters)
|