Spaces:
Sleeping
Sleeping
Commit
•
af6d3ef
0
Parent(s):
Duplicate from dangduytung/chatbot-DiabloGPT
Browse filesCo-authored-by: Demon tendote <dangduytung@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +14 -0
- __init__.py +4 -0
- app.py +82 -0
- main.py +82 -0
- requirements.txt +3 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Chatbot DiabloGPT
|
3 |
+
emoji: 🦀
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.19.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: dangduytung/chatbot-DiabloGPT
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL_MICROSOFT_DIABLO_MEDIUM = 'microsoft/DialoGPT-medium'
|
2 |
+
MODEL_MICROSOFT_DIABLO_LARGE = 'microsoft/DialoGPT-large'
|
3 |
+
|
4 |
+
OUTPUT_MAX_LENGTH = 200
|
app.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
+
import datetime
|
5 |
+
import __init__
|
6 |
+
|
7 |
+
MODEL_NAME = __init__.MODEL_MICROSOFT_DIABLO_MEDIUM
|
8 |
+
OUTPUT_MAX_LENGTH = __init__.OUTPUT_MAX_LENGTH
|
9 |
+
|
10 |
+
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
13 |
+
|
14 |
+
|
15 |
+
def print_f(session_id, text):
|
16 |
+
print(f"{datetime.datetime.now()} | {session_id} | {text}")
|
17 |
+
|
18 |
+
|
19 |
+
def predict(input, history, request: gr.Request):
|
20 |
+
session_id = 'UNKNOWN'
|
21 |
+
if request:
|
22 |
+
# Get session_id is client_ip + client_port
|
23 |
+
session_id = request.client.host + ':' + str(request.client.port)
|
24 |
+
# print_f(session_id, f" inp: {input}")
|
25 |
+
|
26 |
+
# Tokenize the new input sentence
|
27 |
+
new_user_input_ids = tokenizer.encode(
|
28 |
+
input + tokenizer.eos_token, return_tensors='pt')
|
29 |
+
|
30 |
+
# Append the new user input tokens to the chat history
|
31 |
+
bot_input_ids = torch.cat(
|
32 |
+
[torch.LongTensor(history), new_user_input_ids], dim=-1)
|
33 |
+
|
34 |
+
# Generate a response
|
35 |
+
history = model.generate(bot_input_ids, max_length=OUTPUT_MAX_LENGTH,
|
36 |
+
pad_token_id=tokenizer.eos_token_id).tolist()
|
37 |
+
|
38 |
+
# Convert the tokens to text, and then split the responses into lines
|
39 |
+
response = tokenizer.decode(history[0]).split("<|endoftext|>")
|
40 |
+
|
41 |
+
# Convert to tuples of list
|
42 |
+
response = [(response[i], response[i + 1])
|
43 |
+
for i in range(0, len(response) - 1, 2)]
|
44 |
+
|
45 |
+
# Print new conversation
|
46 |
+
print_f(session_id, response[-1])
|
47 |
+
|
48 |
+
return response, history
|
49 |
+
|
50 |
+
|
51 |
+
css = """
|
52 |
+
#row_bot{width: 70%; height: var(--size-96); margin: 0 auto}
|
53 |
+
#row_bot .block{background: var(--color-grey-100); height: 100%}
|
54 |
+
#row_input{width: 70%; margin: 0 auto}
|
55 |
+
#row_input .block{background: var(--color-grey-100)}
|
56 |
+
|
57 |
+
@media screen and (max-width: 768px) {
|
58 |
+
#row_bot{width: 100%; height: var(--size-96); margin: 0 auto}
|
59 |
+
#row_bot .block{background: var(--color-grey-100); height: 100%}
|
60 |
+
#row_input{width: 100%; margin: 0 auto}
|
61 |
+
#row_input .block{background: var(--color-grey-100)}
|
62 |
+
}
|
63 |
+
"""
|
64 |
+
block = gr.Blocks(css=css, title="Chatbot")
|
65 |
+
|
66 |
+
with block:
|
67 |
+
gr.Markdown(f"""
|
68 |
+
<p style="font-size:20px; text-align: center">{MODEL_NAME}</p>
|
69 |
+
""")
|
70 |
+
with gr.Row(elem_id='row_bot'):
|
71 |
+
chatbot = gr.Chatbot()
|
72 |
+
with gr.Row(elem_id='row_input'):
|
73 |
+
message = gr.Textbox(placeholder="Enter something")
|
74 |
+
state = gr.State([])
|
75 |
+
|
76 |
+
message.submit(predict,
|
77 |
+
inputs=[message, state],
|
78 |
+
outputs=[chatbot, state])
|
79 |
+
message.submit(lambda x: "", message, message)
|
80 |
+
|
81 |
+
# Params ex: debug=True, share=True, server_name="0.0.0.0", server_port=5050
|
82 |
+
block.launch()
|
main.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
+
import datetime
|
5 |
+
import __init__
|
6 |
+
|
7 |
+
MODEL_NAME = __init__.MODEL_MICROSOFT_DIABLO_MEDIUM
|
8 |
+
OUTPUT_MAX_LENGTH = __init__.OUTPUT_MAX_LENGTH
|
9 |
+
|
10 |
+
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
13 |
+
|
14 |
+
|
15 |
+
def print_f(session_id, text):
|
16 |
+
print(f"{datetime.datetime.now()} | {session_id} | {text}")
|
17 |
+
|
18 |
+
|
19 |
+
def predict(input, history, request: gr.Request):
|
20 |
+
session_id = 'UNKNOWN'
|
21 |
+
if request:
|
22 |
+
# Get session_id is client_ip + client_port
|
23 |
+
session_id = request.client.host + ':' + str(request.client.port)
|
24 |
+
# print_f(session_id, f" inp: {input}")
|
25 |
+
|
26 |
+
# Tokenize the new input sentence
|
27 |
+
new_user_input_ids = tokenizer.encode(
|
28 |
+
input + tokenizer.eos_token, return_tensors='pt')
|
29 |
+
|
30 |
+
# Append the new user input tokens to the chat history
|
31 |
+
bot_input_ids = torch.cat(
|
32 |
+
[torch.LongTensor(history), new_user_input_ids], dim=-1)
|
33 |
+
|
34 |
+
# Generate a response
|
35 |
+
history = model.generate(bot_input_ids, max_length=OUTPUT_MAX_LENGTH,
|
36 |
+
pad_token_id=tokenizer.eos_token_id).tolist()
|
37 |
+
|
38 |
+
# Convert the tokens to text, and then split the responses into lines
|
39 |
+
response = tokenizer.decode(history[0]).split("<|endoftext|>")
|
40 |
+
|
41 |
+
# Convert to tuples of list
|
42 |
+
response = [(response[i], response[i + 1])
|
43 |
+
for i in range(0, len(response) - 1, 2)]
|
44 |
+
|
45 |
+
# Print new conversation
|
46 |
+
print_f(session_id, response[-1])
|
47 |
+
|
48 |
+
return response, history
|
49 |
+
|
50 |
+
|
51 |
+
css = """
|
52 |
+
#row_bot{width: 70%; height: var(--size-96); margin: 0 auto}
|
53 |
+
#row_bot .block{background: var(--color-grey-100); height: 100%}
|
54 |
+
#row_input{width: 70%; margin: 0 auto}
|
55 |
+
#row_input .block{background: var(--color-grey-100)}
|
56 |
+
|
57 |
+
@media screen and (max-width: 768px) {
|
58 |
+
#row_bot{width: 100%; height: var(--size-96); margin: 0 auto}
|
59 |
+
#row_bot .block{background: var(--color-grey-100); height: 100%}
|
60 |
+
#row_input{width: 100%; margin: 0 auto}
|
61 |
+
#row_input .block{background: var(--color-grey-100)}
|
62 |
+
}
|
63 |
+
"""
|
64 |
+
block = gr.Blocks(css=css, title="Chatbot")
|
65 |
+
|
66 |
+
with block:
|
67 |
+
gr.Markdown(f"""
|
68 |
+
<p style="font-size:20px; text-align: center">{MODEL_NAME}</p>
|
69 |
+
""")
|
70 |
+
with gr.Row(elem_id='row_bot'):
|
71 |
+
chatbot = gr.Chatbot()
|
72 |
+
with gr.Row(elem_id='row_input'):
|
73 |
+
message = gr.Textbox(placeholder="Enter something")
|
74 |
+
state = gr.State([])
|
75 |
+
|
76 |
+
message.submit(predict,
|
77 |
+
inputs=[message, state],
|
78 |
+
outputs=[chatbot, state])
|
79 |
+
message.submit(lambda x: "", message, message)
|
80 |
+
|
81 |
+
# Params ex: debug=True, share=True, server_name="0.0.0.0", server_port=5050
|
82 |
+
block.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.22.2
|
2 |
+
torch==1.13.1
|
3 |
+
gradio==3.19.1
|