Ozaii commited on
Commit
cb3ca2c
1 Parent(s): 57573eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import spaces
5
+
6
+ # Load the model and tokenizer from Hugging Face
7
+ model_path = "Ozaii/Wali-8B-Uncensored-Model" # Replace with your model path
8
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_path,
11
+ torch_dtype=torch.float16,
12
+ )
13
+
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ model.to(device)
16
+ model.eval()
17
+
18
+ # Set initial prompt to ensure desired behavior
19
+ initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
20
+ "use irregular language, abbreviations, swear words, and mimic the conversational style "
21
+ "of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
22
+ "You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
23
+ "***Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.***")
24
+
25
+ @spaces.GPU
26
+ def generate_response(user_input, chat_history):
27
+ max_context_length = 4096
28
+ max_response_length = 1536
29
+
30
+ prompt = initial_prompt + "\n"
31
+ for message in chat_history:
32
+ if message[0] is not None:
33
+ prompt += f"User: {message[0]}\n"
34
+ if message[1] is not None:
35
+ prompt += f"Assistant: {message[1]}\n"
36
+ prompt += f"User: {user_input}\nAssistant:"
37
+
38
+ prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
39
+ if len(prompt_tokens) > max_context_length:
40
+ prompt_tokens = prompt_tokens[-max_context_length:]
41
+ prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
42
+
43
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
44
+ with torch.no_grad():
45
+ outputs = model.generate(
46
+ inputs.input_ids,
47
+ max_length=max_response_length,
48
+ min_length=48,
49
+ temperature=0.55,
50
+ top_k=30,
51
+ top_p=0.5,
52
+ repetition_penalty=1.2,
53
+ no_repeat_ngram_size=3,
54
+ eos_token_id=tokenizer.eos_token_id,
55
+ pad_token_id=tokenizer.eos_token_id
56
+ )
57
+
58
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ assistant_response = response.split("Assistant:")[-1].strip()
60
+ assistant_response = assistant_response.split('\n')[0].strip()
61
+ chat_history.append((user_input, assistant_response))
62
+ return chat_history, chat_history
63
+
64
+ def restart_chat():
65
+ return [], []
66
+
67
+ with gr.Blocks() as chat_interface:
68
+ gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
69
+ chat_history = gr.State([])
70
+ with gr.Column():
71
+ chatbox = gr.Chatbot()
72
+ with gr.Row():
73
+ user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
74
+ submit_button = gr.Button("Send")
75
+ restart_button = gr.Button("Restart")
76
+
77
+ submit_button.click(
78
+ generate_response,
79
+ inputs=[user_input, chat_history],
80
+ outputs=[chatbox, chat_history]
81
+ )
82
+
83
+ restart_button.click(
84
+ restart_chat,
85
+ inputs=[],
86
+ outputs=[chatbox, chat_history]
87
+ )
88
+
89
+ chat_interface.launch(share=True)