Spaces:

nmarafo
/

Child-Safe-Chatbot

Sleeping

App Files Files Community

nmarafo commited on Oct 17, 2024

Commit

7809349

•

1 Parent(s): c917c90

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -89

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 # Cargar el token de Hugging Face desde los secretos
 token = os.environ.get("HF_TOKEN")
@@ -26,92 +27,72 @@ model = AutoModelForCausalLM.from_pretrained(
     token=token,
 )
-# Función para generar el prompt incluyendo todas las políticas
-def generar_prompt(message, idioma):
-    if idioma == "Español":
-        politica_seguridad = """
-        * "No al contenido peligroso": El mensaje no debe contener ni buscar la generación de contenido que cause daño a uno mismo o a otros (por ejemplo, acceder o fabricar armas de fuego o explosivos, promoción del terrorismo, instrucciones para el suicidio).
-        * "No al acoso": El mensaje no debe contener ni buscar la generación de contenido que sea malicioso, intimidante, que fomente el bullying o abusivo dirigido a otra persona (por ejemplo, amenazas físicas, negación de eventos trágicos, menosprecio a víctimas de violencia).
-        * "No al discurso de odio": El mensaje no debe contener ni buscar la generación de contenido que ataque la identidad o atributos protegidos (por ejemplo, insultos raciales, promoción de la discriminación, llamados a la violencia contra grupos protegidos) o que deshumanice, menosprecie o vilipendie por motivos de raza, etnia, religión, discapacidad, edad, nacionalidad, estatus de veterano, orientación sexual, género, identidad de género, casta, u otro estatus protegido.
-        * "No al contenido sexual explícito": El mensaje no debe contener ni buscar la generación de contenido que haga referencia a actos sexuales u otro contenido lascivo (por ejemplo, descripciones gráficas sexuales, contenido destinado a causar excitación). Los términos médicos o científicos relacionados con la anatomía humana o la educación sexual están permitidos.
-        """
-        prompt = f"""Eres un experto en políticas tratando de determinar si un mensaje del usuario
-        viola las políticas de seguridad definidas.
-        <inicio_turno>
-        Pregunta del usuario: {message.strip()}
-        <fin_turno>
-        Nuestro principio de seguridad se define a continuación:
-        {politica_seguridad.strip()}
-        ¿La pregunta del usuario viola alguno de los principios anteriores? Tu respuesta debe comenzar con
-        'Sí' o 'No'. Luego, explica paso a paso para asegurarte de que respondemos correctamente.
-        """
-    else:
-        politica_seguridad = """
-        * "No Dangerous Content": The prompt shall not contain or seek generation of content that harms oneself and/or others (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide).
-        * "No Harassment": The prompt shall not contain or seek generation of content that is malicious, intimidating, bullying, or abusive content targeting another individual (e.g., physical threats, denial of tragic events, disparaging victims of violence).
-        * "No Hate Speech": The prompt shall not contain or seek generation of content that targets identity and/or protected attributes (e.g., racial slurs, promotion of discrimination, calls to violence against protected groups) or dehumanizing/belittling/vilifying based on race, ethnicity, religion, disability, age, nationality, veteran status, sexual orientation, gender, gender identity, caste, or any other protected status.
-        * "No Sexually Explicit Information": The prompt shall not contain or seek generation of content that contains references to sexual acts or other lewd content (e.g., sexually graphic descriptions, content aimed at causing arousal). Medical or scientific terms related to human anatomy or sex education are permitted.
-        """
-        prompt = f"""You are a policy expert trying to help determine whether a user prompt is in
-        violation of the defined safety policies.
-        <start_of_turn>
-        Human Question: {message.strip()}
-        <end_of_turn>
-        Our safety principles are defined below:
-        {politica_seguridad.strip()}
-        Does the human question violate any of the above principles? Your answer must start with
-        'Yes' or 'No'. Then walk through step by step to be sure we answer correctly.
-        """
     return prompt
 @spaces.GPU(duration=150)
-def respond(message, language, system_message, max_tokens, temperature, top_p):
-    prompt = generar_prompt(message, language)
-    inputs = tokenizer(prompt, return_tensors="pt")  # No especificar dispositivo aquí
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=1,
-        temperature=0.0,
-        do_sample=False,
-        return_dict_in_generate=True,
-        output_scores=True,
-    )
-    # Obtener los logits del token generado
-    logits = outputs.scores[0]  # Solo un paso de generación
-    # Obtener los IDs de los tokens "Yes" y "No"
-    if language == "Español":
-        yes_token_id = tokenizer.encode('Sí', add_special_tokens=False)[0]
-        no_token_id = tokenizer.encode('No', add_special_tokens=False)[0]
-    else:
-        yes_token_id = tokenizer.encode('Yes', add_special_tokens=False)[0]
-        no_token_id = tokenizer.encode('No', add_special_tokens=False)[0]
-    # Extraer los logits para "Yes" y "No"
-    selected_logits = logits[0, [yes_token_id, no_token_id]]
-    # Calcular las probabilidades
-    probabilities = torch.softmax(selected_logits, dim=0)
     yes_probability = probabilities[0].item()
     no_probability = probabilities[1].item()
-    # Imprimir las probabilidades
     print(f"Yes probability: {yes_probability}")
     print(f"No probability: {no_probability}")
-    # Decidir si hay violación de políticas
     if yes_probability > no_probability:
-        print("Decisión: Yes (viola las políticas)")
-        if language == "Español":
-            violation_message = "Su pregunta viola las políticas aceptadas."
-        else:
-            violation_message = "Your question violates the accepted policies."
         return violation_message
     else:
-        print("Decisión: No (no viola las políticas)")
         # Generar respuesta al usuario
-        if language == "Español":
-            assistant_prompt = f"{system_message}\nUsuario: {message}\nAsistente:"
-        else:
-            assistant_prompt = f"{system_message}\nUser: {message}\nAssistant:"
-        inputs = tokenizer(assistant_prompt, return_tensors="pt").to("cpu")
         outputs = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
@@ -120,20 +101,14 @@ def respond(message, language, system_message, max_tokens, temperature, top_p):
             do_sample=True,
         )
         assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        if language == "Español":
-            assistant_reply = assistant_response.split("Asistente:")[-1].strip()
-        else:
-            assistant_reply = assistant_response.split("Assistant:")[-1].strip()
         return assistant_reply
 # Crear la interfaz de Gradio usando Blocks
 with gr.Blocks() as demo:
-    gr.Markdown("# Chatbot con Verificación de Políticas")
-    language = gr.Dropdown(choices=["English", "Español"], value="English", label="Idioma/Language")
-    system_message = "You are a friendly Chatbot."
-    with gr.Accordion("Avanzado", open=False):
         max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
         temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
         top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
@@ -142,23 +117,23 @@ with gr.Blocks() as demo:
     message = gr.Textbox(label="Your message")
     submit_button = gr.Button("Send")
-    def submit_message(user_message, chat_history, max_tokens, temperature, top_p, language):
         chat_history = chat_history + [[user_message, None]]
-        assistant_reply = respond(user_message, language, system_message, max_tokens, temperature, top_p)
         chat_history[-1][1] = assistant_reply
         return "", chat_history
     submit_button.click(
         submit_message,
-        inputs=[message, chatbot, max_tokens, temperature, top_p, language],
         outputs=[message, chatbot],
     )
     message.submit(
         submit_message,
-        inputs=[message, chatbot, max_tokens, temperature, top_p, language],
         outputs=[message, chatbot],
     )
 demo.launch(debug=True)

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
+import torch.nn.functional as F
 # Cargar el token de Hugging Face desde los secretos
 token = os.environ.get("HF_TOKEN")
     token=token,
 )
+# Definir el 'system_message' directamente en el código
+system_message = "You are a friendly Chatbot."
+# Función para generar el prompt incluyendo las políticas
+def generar_prompt(user_prompt):
+    safety_policy = """
+* "No Dangerous Content": The prompt shall not contain or seek generation of content that harms oneself and/or others (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide).
+* "No Harassment": The prompt shall not contain or seek generation of content that is malicious, intimidating, bullying, or abusive content targeting another individual (e.g., physical threats, denial of tragic events, disparaging victims of violence).
+* "No Hate Speech": The prompt shall not contain or seek generation of content that targets identity and/or protected attributes (e.g., racial slurs, promotion of discrimination, calls to violence against protected groups) or dehumanizing/belittling/vilifying based on race, ethnicity, religion, disability, age, nationality, veteran status, sexual orientation, gender, gender identity, caste, or any other protected status.
+* "No Sexually Explicit Information": The prompt shall not contain or seek generation of content that contains references to sexual acts or other lewd content (e.g., sexually graphic descriptions, content aimed at causing arousal). Medical or scientific terms related to human anatomy or sex education are permitted.
+    """
+    prompt = f"""You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.
+<start_of_turn>
+Human Question: {user_prompt.strip()}
+<end_of_turn>
+Our safety principle is defined in the below:
+{safety_policy.strip()}
+Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.
+"""
     return prompt
 @spaces.GPU(duration=150)
+# Función para procesar la respuesta y verificar políticas
+def respond(message, max_tokens, temperature, top_p):
+    prompt = generar_prompt(message)
+    inputs = tokenizer(prompt, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+    # Obtener el vocabulario
+    vocab = tokenizer.get_vocab()
+    # IDs de los tokens 'Yes' y 'No'
+    yes_token_id = vocab.get('Yes')
+    no_token_id = vocab.get('No')
+    # Comprobar si los tokens existen en el vocabulario
+    if yes_token_id is None or no_token_id is None:
+        raise ValueError("Los tokens 'Yes' o 'No' no se encontraron en el vocabulario.")
+    # Extraer los logits para 'Yes' y 'No'
+    selected_logits = logits[0, -1, [yes_token_id, no_token_id]]
+    # Calcular las probabilidades con softmax
+    probabilities = F.softmax(selected_logits, dim=0)
+    # Probabilidad de 'Yes' y 'No'
     yes_probability = probabilities[0].item()
     no_probability = probabilities[1].item()
     print(f"Yes probability: {yes_probability}")
     print(f"No probability: {no_probability}")
+    # Decidir si hay violación de políticas en función de la probabilidad de 'Yes'
     if yes_probability > no_probability:
+        violation_message = "Your question violates the accepted policies."
         return violation_message
     else:
         # Generar respuesta al usuario
+        assistant_prompt = f"{system_message}\nUser: {message}\nAssistant:"
+        inputs = tokenizer(assistant_prompt, return_tensors="pt")
         outputs = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
             do_sample=True,
         )
         assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        assistant_reply = assistant_response.split("Assistant:")[-1].strip()
         return assistant_reply
 # Crear la interfaz de Gradio usando Blocks
 with gr.Blocks() as demo:
+    gr.Markdown("# Child-Safe-Chatbot")
+    with gr.Accordion("Advanced", open=False):
         max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
         temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
         top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
     message = gr.Textbox(label="Your message")
     submit_button = gr.Button("Send")
+    def submit_message(user_message, chat_history, max_tokens, temperature, top_p):
         chat_history = chat_history + [[user_message, None]]
+        assistant_reply = respond(
+            user_message, max_tokens, temperature, top_p
+        )
         chat_history[-1][1] = assistant_reply
         return "", chat_history
     submit_button.click(
         submit_message,
+        inputs=[message, chatbot, max_tokens, temperature, top_p],
         outputs=[message, chatbot],
     )
     message.submit(
         submit_message,
+        inputs=[message, chatbot, max_tokens, temperature, top_p],
         outputs=[message, chatbot],
     )
 demo.launch(debug=True)