from transformers import pipeline import gradio as gr import spaces transcribe = pipeline( task="automatic-speech-recognition", model="ckpt_base/checkpoint-1308", tokenizer="ckpt_base", chunk_length_s=30, device=0, ) transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ja", task="transcribe") @spaces.GPU def main(audio_path): return transcribe(audio_path)["text"] with open('./README.md') as f: md = f.readlines() md = md[11:] md = "\n".join(md) iface = gr.Interface( fn=main, inputs=[gr.Audio(type='filepath',sources=['microphone','upload'])], description=md, outputs="text", title="CoCoCap-beta 日本語声質キャプショニンング with CocoNut Corpus", ).launch(share=True)