spellingdragon
/

whisper-large-v3-handler

Automatic Speech Recognition

hf-asr-leaderboard

Inference Endpoints

Model card Files Files and versions Community

spellingdragon commited on Nov 25, 2023

Commit

6c44ec0

•

1 Parent(s): a95b4c2

Update handler.py

Files changed (1) hide show

handler.py +5 -3

handler.py CHANGED Viewed

@@ -8,16 +8,17 @@ class EndpointHandler():
         device = "cuda:0" if torch.cuda.is_available() else "cpu"
         torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model_id = "openai/whisper-large-v3"
-        self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
             model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
         )
-        self.model.to(device)
         processor = AutoProcessor.from_pretrained(model_id)
         self.pipeline = pipeline(
             "automatic-speech-recognition",
-            model=self.model,
             tokenizer=processor.tokenizer,
             feature_extractor=processor.feature_extractor,
             max_new_tokens=128,
@@ -27,6 +28,7 @@ class EndpointHandler():
             torch_dtype=torch_dtype,
             device=device,
         )
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:

         device = "cuda:0" if torch.cuda.is_available() else "cpu"
         torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
         model_id = "openai/whisper-large-v3"
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(
             model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
         )
+        model.to(device)
         processor = AutoProcessor.from_pretrained(model_id)
         self.pipeline = pipeline(
             "automatic-speech-recognition",
+            model=model,
             tokenizer=processor.tokenizer,
             feature_extractor=processor.feature_extractor,
             max_new_tokens=128,
             torch_dtype=torch_dtype,
             device=device,
         )
+        self.model = model
     def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]: