Spaces:
Build error
Build error
jijivski
commited on
Commit
•
e2bf898
1
Parent(s):
8b7042b
dynamically add choices
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import pdb
|
|
6 |
from types import SimpleNamespace
|
7 |
import pandas as pd
|
8 |
import plotly.express as px
|
|
|
|
|
9 |
# os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
|
10 |
# os.system('cd lm-evaluation-harness')
|
11 |
# os.system('pip install -e .')
|
@@ -95,7 +97,52 @@ def plotly_plot():#(df, x, y, color,title, x_title, y_title):
|
|
95 |
# fig.update_yaxes(title_text=y_title)
|
96 |
# return fig
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
|
101 |
def color_pipeline(texts=["Hi","FreshEval","!"], model=None):
|
@@ -151,6 +198,11 @@ with gr.Blocks() as demo:
|
|
151 |
|
152 |
# markdown
|
153 |
gr.Markdown('### How to use this app')
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
|
156 |
|
|
|
6 |
from types import SimpleNamespace
|
7 |
import pandas as pd
|
8 |
import plotly.express as px
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import numpy as np
|
11 |
# os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
|
12 |
# os.system('cd lm-evaluation-harness')
|
13 |
# os.system('pip install -e .')
|
|
|
97 |
# fig.update_yaxes(title_text=y_title)
|
98 |
# return fig
|
99 |
|
100 |
+
def show_attention_plot(model_name,texts):
|
101 |
+
# 初始化分词器和模型,确保在模型配置中设置 output_attentions=True
|
102 |
+
args=SimpleNamespace(texts=texts,model=model_name)
|
103 |
+
print(f'L60,text:{texts}')
|
104 |
+
rtn_dic=run_get_loss(args)
|
105 |
+
# print(rtn_dic)
|
106 |
+
# pdb.set_trace()
|
107 |
+
# {'logit':logit,'input_ids':input_chunk,'tokenizer':tokenizer,'neg_log_prob_temp':neg_log_prob_temp}
|
108 |
+
# ids, loss =rtn_dic['input_ids'],rtn_dic['loss']#= get_ids_loss(text, tokenizer, model)
|
109 |
+
# notice here is numpy ndarray
|
110 |
+
tokenizer, model = rtn_dic['tokenizer'],rtn_dic['model']
|
111 |
+
text = "Here is some text to encode"
|
112 |
+
|
113 |
+
# 使用分词器处理输入文本
|
114 |
+
inputs = tokenizer(text, return_tensors="pt")
|
115 |
+
# 进行前向传播,获取输出
|
116 |
+
outputs = model(**inputs, output_attentions=True)
|
117 |
+
|
118 |
+
# 检查是否成功获得了 attentions
|
119 |
+
if "attentions" in outputs:
|
120 |
+
last_layer_attentions = outputs.attentions[-1] # 获取最后一层的 attention 矩阵
|
121 |
+
print("Successfully retrieved the attention matrix:", last_layer_attentions.shape)
|
122 |
+
else:
|
123 |
+
pdb.set_trace()
|
124 |
+
print("Attention matrix not found in outputs.")
|
125 |
|
126 |
+
# 假设 last_layer_attentions 是我们从模型中提取的注意力矩阵
|
127 |
+
# last_layer_attentions 的形状应该是 [batch_size, num_heads, seq_length, seq_length]
|
128 |
+
# 为了简化,我们这里只查看第一个样本、第一个头的注意力矩阵
|
129 |
+
attention_matrix = last_layer_attentions[0, 0].detach().numpy()
|
130 |
+
|
131 |
+
# 使用 matplotlib 绘制热图
|
132 |
+
plt.figure(figsize=(10, 8))
|
133 |
+
plt.imshow(attention_matrix, cmap='viridis')
|
134 |
+
|
135 |
+
# 添加标题和标签以提高可读性
|
136 |
+
plt.title('Attention Matrix Visualization')
|
137 |
+
plt.xlabel('Tokens in Sequence')
|
138 |
+
plt.ylabel('Tokens in Sequence')
|
139 |
+
|
140 |
+
# 添加颜色条
|
141 |
+
plt.colorbar()
|
142 |
+
|
143 |
+
# 保存图表到文件
|
144 |
+
# plt.savefig('/223040239/medbase/attention_matrix_visualization.png')
|
145 |
+
return plt
|
146 |
|
147 |
|
148 |
def color_pipeline(texts=["Hi","FreshEval","!"], model=None):
|
|
|
198 |
|
199 |
# markdown
|
200 |
gr.Markdown('### How to use this app')
|
201 |
+
|
202 |
+
|
203 |
+
attention_plot=gr.Plot(label='attention plot')
|
204 |
+
see_attention_button = gr.Button("see attention").click(show_attention_plot,inputs=[model_input, text_input],outputs=[attention_plot])
|
205 |
+
|
206 |
|
207 |
|
208 |
|
get_loss/get_loss_hf.py
CHANGED
@@ -129,10 +129,12 @@ def load_hf_model(path, cache_path):
|
|
129 |
device_map=device,
|
130 |
trust_remote_code=True,
|
131 |
cache_dir=cache_path).eval()
|
|
|
132 |
else:
|
133 |
hf_model = AutoModelForCausalLM.from_pretrained(path,
|
134 |
device_map=device,
|
135 |
-
trust_remote_code=True
|
|
|
136 |
hf_tokenizer = AutoTokenizer.from_pretrained(path)
|
137 |
|
138 |
print_model_parameters_in_billions(hf_model)
|
@@ -227,7 +229,8 @@ def eval_hf_model(model, tokenizer, texts, chunk_size):
|
|
227 |
|
228 |
# print(f'log probability sum: {sum(data) / len(data):.2f}')
|
229 |
# print(f'avg tokens: {sum(token_length_list) / len(token_length_list):.0f}')
|
230 |
-
rtn_dic={'logit':logit.cpu().numpy(),'input_ids':input_chunk.cpu().numpy()[0],
|
|
|
231 |
return rtn_dic
|
232 |
|
233 |
|
|
|
129 |
device_map=device,
|
130 |
trust_remote_code=True,
|
131 |
cache_dir=cache_path).eval()
|
132 |
+
# output_attentions=True)
|
133 |
else:
|
134 |
hf_model = AutoModelForCausalLM.from_pretrained(path,
|
135 |
device_map=device,
|
136 |
+
trust_remote_code=True,
|
137 |
+
output_attentions=True).eval()
|
138 |
hf_tokenizer = AutoTokenizer.from_pretrained(path)
|
139 |
|
140 |
print_model_parameters_in_billions(hf_model)
|
|
|
229 |
|
230 |
# print(f'log probability sum: {sum(data) / len(data):.2f}')
|
231 |
# print(f'avg tokens: {sum(token_length_list) / len(token_length_list):.0f}')
|
232 |
+
rtn_dic={'logit':logit.cpu().numpy(),'input_ids':input_chunk.cpu().numpy()[0],
|
233 |
+
'loss':loss,'tokenizer':tokenizer,'neg_log_prob_temp':neg_log_prob_temp,'model':model}
|
234 |
return rtn_dic
|
235 |
|
236 |
|
gradio_samples/add_components.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# def words():
|
3 |
+
# sentence = "A test of Gradio"
|
4 |
+
# words = sentence.split()
|
5 |
+
# update_show = [gr.Button.update(visible=True, value=w) for w in words]
|
6 |
+
# update_hide = [gr.Button.update(visible=False, value="") for _ in range(10-len(words))]
|
7 |
+
# return update_show + update_hide
|
8 |
+
|
9 |
+
|
10 |
+
# import gradio as gr
|
11 |
+
|
12 |
+
# def words():
|
13 |
+
# sentence = "A test of Gradio"
|
14 |
+
# words = sentence.split()
|
15 |
+
# update_show = [gr.Button(visible=True, value=w) for w in words]
|
16 |
+
# update_hide = [gr.Button(visible=False, value="") for _ in range(10-len(words))]
|
17 |
+
# return update_show + update_hide
|
18 |
+
|
19 |
+
# btn_list = []
|
20 |
+
|
21 |
+
# with gr.Blocks() as demo:
|
22 |
+
# with gr.Tab():
|
23 |
+
# for i in range(10):
|
24 |
+
# btn = gr.Button(visible=False)
|
25 |
+
# btn_list.append(btn)
|
26 |
+
# b = gr.Button("Run")
|
27 |
+
# b.click(words, None, btn_list)
|
28 |
+
|
29 |
+
# demo.launch()
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
import gradio as gr
|
34 |
+
|
35 |
+
def words():
|
36 |
+
sentence = "A test of Gradio"
|
37 |
+
words = sentence.split()
|
38 |
+
update_show = [gr.Textbox(visible=True, value=w,interactive=True) for w in words]
|
39 |
+
update_hide = [gr.Textbox(visible=False, value="",interactive=True) for _ in range(10-len(words))]
|
40 |
+
return update_show + update_hide
|
41 |
+
|
42 |
+
|
43 |
+
def get_text_content(*btn_list):
|
44 |
+
# make all the input as a list
|
45 |
+
# merge_list
|
46 |
+
# rtn =[w.value for w in btn_list if w.visible]
|
47 |
+
|
48 |
+
rtn =' '.join([w for w in btn_list ])
|
49 |
+
|
50 |
+
print(rtn)
|
51 |
+
return rtn
|
52 |
+
|
53 |
+
|
54 |
+
btn_list = []
|
55 |
+
|
56 |
+
with gr.Blocks() as demo:
|
57 |
+
with gr.Row():
|
58 |
+
for i in range(10):
|
59 |
+
btn = gr.Textbox(visible=False)
|
60 |
+
btn_list.append(btn)
|
61 |
+
b = gr.Button("Run")
|
62 |
+
b.click(words, None, btn_list)
|
63 |
+
|
64 |
+
b = gr.Button("Get Text Content")
|
65 |
+
output = gr.Textbox()
|
66 |
+
b.click(get_text_content, btn_list, output)
|
67 |
+
|
68 |
+
demo.launch(debug=True)
|
diff_color.py → gradio_samples/diff_color.py
RENAMED
File without changes
|
hello_test.py → gradio_samples/hello_test.py
RENAMED
File without changes
|
hf_space_test.py → gradio_samples/hf_space_test.py
RENAMED
File without changes
|
plot.py → gradio_samples/plot.py
RENAMED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import altair
|
2 |
|
3 |
import gradio as gr
|
@@ -20,7 +21,7 @@ def outbreak(plot_type, r, month, countries, social_distancing):
|
|
20 |
# df = pd.DataFrame({"day": x})
|
21 |
# for country in countries:
|
22 |
# df[country] = x ** (r) * (pop_count[country] + 1)
|
23 |
-
df=pd.read_csv('
|
24 |
print(df.head())
|
25 |
# pdb.set_trace()
|
26 |
|
|
|
1 |
+
# seem not working here...
|
2 |
import altair
|
3 |
|
4 |
import gradio as gr
|
|
|
21 |
# df = pd.DataFrame({"day": x})
|
22 |
# for country in countries:
|
23 |
# df[country] = x ** (r) * (pop_count[country] + 1)
|
24 |
+
df=pd.read_csv('../data/ob.csv')
|
25 |
print(df.head())
|
26 |
# pdb.set_trace()
|
27 |
|