import gradio as gr
from openai import OpenAI
import re
from konlpy.tag import Kkma
from TextUtil.digit2text import digit2txt, NNGdigit2txt, CSign2txt
def process_txt(text):
kkma = Kkma()
result = ""
pattern = re.compile(r'([가-힣]+)|([a-zA-Z.]+)|(\d[\d,.]*)|(\$|€|£|¥|₩)|(\s+)')
matches = pattern.finditer(text)
for match in matches:
if match.group(1): # Korean part
result += match.group(1)
elif match.group(2):
result += match.group(2)
elif match.group(3): # Number part
end_index = match.end(3)
# NNG Case
next_word = kkma.pos(text[end_index:])[0]
if next_word[1] == "NNG" and next_word[0] not in ['달러', '유료', '파운드', '엔', '원']:
result += NNGdigit2txt(match.group(3).replace(',', ''))
else:
result += digit2txt(match.group(3).replace(',', ''))
elif match.group(4): # Currency symbol part
result += CSign2txt(match.group(4))
elif match.group(5): # Space part
result += match.group(5)
return result
def generate_audio(api_key, file, model, voice):
# OpenAI 클라이언트 초기화 (사용자 입력 API 키 사용)
client = OpenAI(api_key=api_key)
# 파일 읽기
text = file.decode("utf-8")
# 텍스트 처리
text = process_txt(text)
print(text)
# TTS 요청
response = client.audio.speech.create(
model=model,
voice=voice,
input=text
)
# MP3 파일로 저장
f_name = "generated_audio"
speech_file_path = f"{f_name}.mp3"
response.stream_to_file(speech_file_path)
return speech_file_path
# Gradio 인터페이스 정의
iface = gr.Interface(
fn=generate_audio,
inputs=[
gr.Text(label="Enter OpenAI API Key"),
gr.File(label="Upload Text File", type="binary"),
gr.Radio(choices=["tts-1", "tts-1-hd"], label="Model"),
gr.Radio(choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"], label="Voice")
],
outputs=gr.File(label="Download MP3 File"),
title="Text-to-Speech Converter (Korean Digit2Text)",
description="Upload a text file and enter your OpenAI API key to convert it into speech using OpenAI's Text-to-Speech models.
*해당 서비스는 한국어에 맞춤화되어 있습니다.
*한국어 숫자 발음 변환을 통해 더 정확한 숫자 TTS를 가능하게 합니다.
*예시: 50,000$ -> 오만달러, 5가지 -> 다섯가지, 99권 -> 아흔아홉권"
)
if __name__ == "__main__":
iface.launch()