File size: 6,044 Bytes
b508e95
 
 
 
 
 
 
4b2da06
b508e95
4b2da06
b508e95
4b2da06
b508e95
 
 
4b2da06
b508e95
 
 
4b2da06
76c8687
 
 
 
 
 
4b2da06
b508e95
4b2da06
b508e95
 
 
 
 
 
 
 
 
4b2da06
b508e95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b2da06
 
b508e95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b2da06
b508e95
 
 
4b2da06
b508e95
4b2da06
b508e95
 
 
 
4b2da06
 
b508e95
 
 
 
 
 
 
4b2da06
b508e95
 
 
4b2da06
b508e95
 
 
 
 
4b2da06
 
b508e95
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# import gradio as gr
# import torch
# import transformers
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# from peft import PeftConfig, PeftModel
# import warnings
# from threading import Thread

# warnings.filterwarnings("ignore")

# PEFT_MODEL = "givyboy/phi-2-finetuned-mental-health-conversational"

# SYSTEM_PROMPT = """Answer the following question truthfully.
#   If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
#   If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""

# USER_PROMPT = lambda x: f"""<HUMAN>: {x}\n<ASSISTANT>: """
# ADD_RESPONSE = lambda x, y: f"""<HUMAN>: {x}\n<ASSISTANT>: {y}"""
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=torch.float16,
# )

# config = PeftConfig.from_pretrained(PEFT_MODEL)

# peft_base_model = AutoModelForCausalLM.from_pretrained(
#     config.base_model_name_or_path,
#     return_dict=True,
#     # quantization_config=bnb_config,
#     device_map="auto",
#     trust_remote_code=True,
#     offload_folder="offload/",
#     offload_state_dict=True,
# )

# peft_model = PeftModel.from_pretrained(
#     peft_base_model,
#     PEFT_MODEL,
#     offload_folder="offload/",
#     offload_state_dict=True,
# )
# peft_model = peft_model.to(DEVICE)

# peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
# peft_tokenizer.pad_token = peft_tokenizer.eos_token

# pipeline = transformers.pipeline(
#     "text-generation",
#     model=peft_model,
#     tokenizer=peft_tokenizer,
#     torch_dtype=torch.bfloat16,
#     trust_remote_code=True,
#     device_map="auto",
# )


# # def format_message(message: str, history: list[str], memory_limit: int = 3) -> str:
# #     if len(history) > memory_limit:
# #         history = history[-memory_limit:]

# #     if len(history) == 0:
# #         return f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"

# #     formatted_message = f"{SYSTEM_PROMPT}\n{ADD_RESPONSE(history[0][0], history[0][1])}"

# #     for msg, ans in history[1:]:
# #         formatted_message += f"\n{ADD_RESPONSE(msg, ans)}"

# #     formatted_message += f"\n{USER_PROMPT(message)}"
# #     return formatted_message


# # def get_model_response(message: str, history: list[str]) -> str:
# #     formatted_message = format_message(message, history)
# #     sequences = pipeline(
# #         formatted_message,
# #         do_sample=True,
# #         top_k=10,
# #         num_return_sequences=1,
# #         eos_token_id=peft_tokenizer.eos_token_id,
# #         max_length=600,
# #     )[0]
# #     print(sequences["generated_text"])
# #     output = sequences["generated_text"].split("<ASSISTANT>:")[-1].strip()
# #     # print(f"Response: {output}")
# #     return output


# start_message = ""


# def user(message, history):
#     # Append the user's message to the conversation history
#     return "", history + [[message, ""]]


# def chat(message, history):
#     chat_history = []
#     for item in history:
#         chat_history.append({"role": "user", "content": item[0]})
#         if item[1] is not None:
#             chat_history.append({"role": "assistant", "content": item[1]})

#     message = f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
#     chat_history.append({"role": "user", "content": message})
#     messages = peft_tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)

#     # Tokenize the messages string
#     model_inputs = peft_tokenizer([messages], return_tensors="pt").to(DEVICE)
#     streamer = transformers.TextIteratorStreamer(
#         peft_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
#     )
#     generate_kwargs = dict(
#         model_inputs,
#         streamer=streamer,
#         max_new_tokens=1024,
#         do_sample=True,
#         top_p=0.95,
#         top_k=1000,
#         temperature=0.75,
#         num_beams=1,
#     )
#     t = Thread(target=peft_model.generate, kwargs=generate_kwargs)
#     t.start()

#     # Initialize an empty string to store the generated text
#     partial_text = ""
#     for new_text in streamer:
#         # print(new_text)
#         partial_text += new_text
#         # Yield an empty string to cleanup the message textbox and the updated conversation history
#         yield partial_text


# chat = gr.ChatInterface(fn=chat, title="Mental Health Chatbot - by Jayda Hunte")
# chat.launch(share=True)

import os
from openai import OpenAI
from dotenv import load_dotenv
import gradio as gr

load_dotenv()
API_KEY = os.getenv("OPENAI_API_KEY")
openai = OpenAI(api_key=API_KEY)

create_msg = lambda x, y: {"role": x, "content": y}

SYSTEM_PROMPT = create_msg(
    "system",
    """You are a helpful mental health chatbot, please answer with care. If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'. If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.""".strip(),
)


def predict(message, history):
    history_openai_format = []
    history_openai_format.append(SYSTEM_PROMPT)
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    response = openai.chat.completions.create(
        model="ft:gpt-3.5-turbo-0613:personal::8kBTG8eh", messages=history_openai_format, temperature=1.0, stream=True
    )

    partial_message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            partial_message = partial_message + chunk.choices[0].delta.content
            yield partial_message


gr.ChatInterface(fn=predict, title="Mental Health Chatbot - by Jayda Hunte").launch(share=True)