deepsync's picture
Update app.py
ad59ec4 verified
import os
import re
import json
import time
import requests
import gradio as gr
import google.auth
from google.auth.transport.requests import Request
import google.generativeai as genai
genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
def upload_to_gemini(path, mime_type=None):
file = genai.upload_file(path, mime_type=mime_type)
print(f"Uploaded file '{file.display_name}' as: {file.uri}")
return file
generation_config = {
"temperature": 1,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 1_048_576,
"response_mime_type": "text/plain",
}
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
model = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest",
safety_settings=safety_settings,
generation_config=generation_config,
system_instruction="Act as a language model trained on a specific style of writing that incorporates both Roman and Devanagari script",
)
def generate_transliteration_gemini_15_pro(text):
texts = [text]
transliteration_example_file = upload_to_gemini(
"ai_exp_json.txt", mime_type="text/plain"
)
chat_session = model.start_chat(
history=[
{
"role": "user",
"parts": [
"Given a sentence in Roman written English and a set of pre-defined patterns, transliterate only specific words to Devanagari script while maintaining a desired ratio between Roman and Devanagari words. Your task is to transliterate only a subset of words while maintaining the overall meaning and sentence structure.\n",
'Based on a provided English sentence and a desired transliteration ratio, use your knowledge of this unique style to select words for transliteration that enhance the overall message and aesthetic. I will provide you with training examples to understand the preferred approach.\nGo through the examples in the file in following JSON format: [{"English": xxx, "Transliteration"}]." and Develop a system that can intelligently choose which English words to transliterate into Devanagari in a sentence, aiming for a specific ratio between the two scripts. With the help of examples in Json format file, design a system that can learn the optimal ratio and transliteration pattern.',
transliteration_example_file,
],
},
]
)
response = chat_session.send_message(
'Given an English sentences: \n```' + "\n".join(texts) + '\n```\nTransliterate English sentences into a mix of Roman and Devanagari script, following a predefined pattern or learning from provided examples above without explain anything.\nReturn output in JSON in following format for the list of sentences: {"text": xxx, "transliterate": xxx}'
)
clean_text = lambda res: res.replace("```json", "").replace("```", "").replace("\n", "")
print(response.text)
data = json.loads(clean_text(response.text))
if type(data) is list:
data = data[0]
return clean_hindi_transliterated_text(data["transliterate"])
def update_text_from_dictionary(text, dictionary_path="./en_hi.dict", initial_lookup=True):
if not dictionary_path:
return text
with open(dictionary_path) as f:
lines = f.read().splitlines()
updated_lines = list(map(lambda x: x.split("|"), lines))
initial_pass_dict = {}
final_pass_dict = {}
for initial, incorrect, correct in updated_lines:
initial_pass_dict[initial] = correct
initial_pass_dict[initial+"."] = correct+"."
initial_pass_dict[initial+"?"] = correct+"?"
initial_pass_dict[initial+","] = correct+","
final_pass_dict[incorrect] = correct
final_pass_dict[incorrect+"."] = correct+"."
final_pass_dict[incorrect+"?"] = correct+"?"
final_pass_dict[incorrect+","] = correct+","
if initial_lookup:
print(f"Original [{initial_lookup}]: ", text)
# print(initial_pass_dict)
new_text = " ".join([initial_pass_dict.get(t, t) for t in text.split()])
print(f"New [{initial_lookup}]: ", new_text)
else:
print(f"Original [{initial_lookup}]: ", text)
# print(final_pass_dict)
new_text = " ".join([final_pass_dict.get(t, t) for t in text.split()])
print(f"New [{initial_lookup}]: ", new_text)
return new_text
def get_google_token():
credentials, project = google.auth.load_credentials_from_dict(
json.loads(os.environ.get('GCP_FINETUNE_KEY')),
scopes=[
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/generative-language.tuning",
],
)
request = Request()
credentials.refresh(request)
access_token = credentials.token
return access_token
def transliterate_first_word(text):
texts = text.split(maxsplit=1)
if len(texts) > 1:
first_word, rest = texts
else:
first_word, rest = texts[0], ""
if not first_word.isalnum():
return text
url = "https://inputtools.google.com/request"
n=1
params = {
"text": first_word,
"num": n,
"itc": "hi-t-i0-und",
"cp": 0,
"cs": 1,
"ie": "utf-8",
"app": "demopage"
}
response = requests.get(url, params=params)
results = response.json()[1][0][1]
first_word_transliterated = results[0]
return f"{first_word_transliterated} {rest}"
def clean(result):
text = result["choices"][0]['message']["content"]
text = re.sub(r"\(.*?\)|\[.*?\]","", text)
text = text.strip("'").replace('"', "").replace('`', "")
if "\n" in text.strip("\n"):
text = text.split("\n")[-1]
return clean_hindi_transliterated_text(text)
def clean_hindi_transliterated_text(text):
updates = [('ऑ', 'औ'), ('ॉ', 'ौ'), ('ॅ', 'े'), ("{", ""), ("}", ""), ("'text'", ""), (":", "")]
text = text.replace('`', '').replace("output:", "")
for o, n in updates:
text = text.replace(o, n)
final_text = text.strip().strip("'").strip('"')
result_text = update_text_from_dictionary(final_text, initial_lookup=False)
return result_text
def dubpro_english_transliteration(text, call_gpt):
if call_gpt:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}
text = update_text_from_dictionary(text, initial_lookup=True)
prompt = f"Given the English text, transliterate it to Hindi, without translation. Return only the transliterated text, without any instruction or messages. Text: `{text}`\nOutput: "
messages = [
{"role": "user", "content": prompt}
]
resp = None
while resp is None:
resp = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json={
"model": "gpt-4o-2024-05-13",
"messages": messages
})
if resp.status_code != 200:
print(resp.text)
time.sleep(0.5)
return clean(resp.json())
else:
return generate_transliteration_gemini_15_pro(text)
# API_URL = os.environ.get("GEMINI_FINETUNED_HINDI_ENG_API")
# BEARER_TOKEN = get_google_token()
# headers = {
# "Authorization": f"Bearer {BEARER_TOKEN}",
# "Content-Type": "application/json",
# }
# payload = {
# "contents": [
# {
# "parts": [{"text": f"input: {text}"}],
# "role": "user",
# }
# ],
# "generationConfig": {
# "maxOutputTokens": 8192,
# "temperature": 0.85,
# },
# "safetySettings": [
# {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
# ],
# }
# result = requests.post(
# url=API_URL,
# headers=headers,
# json=payload
# )
# response = result.json()
# response_content = response['candidates'][0]['content']['parts'][0]['text'].replace("output:", "").strip().replace("'text':", "").replace("{", "").replace("}", "").strip().strip("'").strip('"')
# # response_content = transliterate_first_word(response_content)
# return response_content
def generate_rephrases_gemini(text, language, problem):
API_URL = os.environ.get("GEMINI_REPHRASER_API")
BEARER_TOKEN = get_google_token()
headers = {
"Authorization": f"Bearer {BEARER_TOKEN}",
"Content-Type": "application/json",
}
if problem == "Gap":
speak = "more"
else:
speak = "less"
if language == "English":
prompt = f"You are an English and Hindi language expert, please rephrase a sentence that has been translated from Hindi to English so that it takes little {speak} time to speak."
elif language == "Hindi":
prompt = f"You are a hindi language expert please rephrase the below line without summary so that it takes little {speak} time to speak in hinglish manner."
payload = {
"contents": [
{
"parts": [
{
"text": prompt
},
{
"text": f"input: {text}"
},
{
"text": f"output: "
}
],
"role": "user",
}
],
"generationConfig": {
"maxOutputTokens": 8192,
"temperature": 0.85,
"candidateCount": 1,
},
"safetySettings": [
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
],
}
result = requests.post(url=API_URL, headers=headers, json=payload)
response = result.json()
output_text = response["candidates"][0]["content"]["parts"][0]["text"]
texts = list(map(lambda x: x.replace("-", "").strip(), output_text.split("\n")))
texts = "\n".join(texts)
# texts = dubpro_english_transliteration(texts)
wc = f"Original Word Count: {len(text.split())}\nRephrased Word Count: {len(texts.split())}"
return texts, wc
with gr.Blocks() as demo:
gr.Markdown("# Translator Assistance Tools")
with gr.Tab("Transliteration"):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input text", info="Please enter English text.")
full_transliteration = gr.Checkbox(label="Full transliteration", value=True)
output_text = gr.Textbox(label="Output text")
transliterate = gr.Button("Submit")
transliterate.click(dubpro_english_transliteration, [input_text, full_transliteration], output_text)
with gr.Tab("Rephraser Tool"):
with gr.Row():
rephrase_text = gr.Textbox(label="Input text", info="Please enter text.")
language = gr.Dropdown(["English", "Hindi"], value="Hindi")
solving_for = gr.Dropdown(["Gap", "Overflow"], value="Overflow", label="Solving for:")
with gr.Row():
word_count = gr.Textbox(label="Word count")
rephrased_text = gr.Textbox(label="Output text")
rephrase = gr.Button("Submit")
rephrase.click(generate_rephrases_gemini, [rephrase_text, language, solving_for], [rephrased_text, word_count])
demo.launch(auth=(os.environ.get("USERNAME"), os.environ.get("PASSWORD")))