xycold's picture
Update app.py
cd29413 verified
raw
history blame
3.74 kB
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import tempfile
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
# 设置中文字体和编码
plt.rcParams['font.sans-serif'] = ['SimHei'] # 选择合适的中文字体,这里使用黑体
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示字符
def process_file(file):
# 读取CSV或Excel文件并创建DataFrame
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
df = pd.read_excel(file.name)
else:
return "不支持的数据文件格式。"
columns = df.columns.tolist()
feature_columns = columns[:-1]
last_column = [columns[-1]]
# 返回前5行数据,更新下拉列表选项,并使其他控件可见
return (gr.update(choices=feature_columns, value=feature_columns[0], visible=True),
gr.update(choices=last_column, value=last_column[-1], visible=True),
gr.update(visible=True),
gr.update(visible=True),
df.head()
)
def update_slider(choice):
# 更新数轴控件的可见性
return gr.update(visible=choice == "是")
def generate_output(file, column1, column2, choice, bins):
df = pd.read_csv(file.name)
data_x = df[column1]
data_y = df[column2]
# 自动判断column1的数据类型
if choice == "是":
# 如果是连续值,则进行分组
data_x = pd.qcut(data_x, q=bins, duplicates='drop')
else:
# 如果是离散值,则直接使用
pass
# 统计每个身高分段中不同心血管疾病类别的数量
counts = pd.crosstab(data_x, data_y)
# 绘制分段柱形图
counts.plot(kind='bar')
# 设置画布大小
plt.figure(figsize=(bins*2, 6))
# 设置 x 轴刻度标签横向显示
plt.xticks(rotation=0)
plt.xlabel(column1, fontsize=12)
plt.ylabel(column2, fontsize=12)
# plt.legend(['不患病', '患病'])
plt.title(f'{column1}{column2}的关系', fontsize=14)
# plt.show()
image_path = 'output.png'
plt.savefig(image_path)
# plt.close()
return df.head(), gr.update(visible=True), image_path, gr.update(visible=True), gr.update(visible=True)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
file_input = gr.File(label="上传表格文件(支持CSV、XLS、XLSX等格式", file_types=["csv", "xls", "xlsx"])
col1_dropdown = gr.Dropdown(label="请选择特征列", visible=False)
col2_dropdown = gr.Dropdown(label="选择标签列", visible=False)
choice_radio = gr.Radio(["是", "否"], label="特征列是否为连续值", value="否", visible=False)
slider = gr.Slider(minimum=3, maximum=7, step=1, label="选择将特征列分组的分组数", visible=False, value=4)
submit_button = gr.Button("查看结果", visible=False)
with gr.Column():
df_display = gr.Dataframe(visible=False)
output_image = gr.Image(visible=False)
# 文件上传后调用 process_file 函数
file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, choice_radio, df_display, df_display])
# 选择框值改变时调用 update_slider 函数
choice_radio.change(update_slider, inputs=choice_radio, outputs=slider)
# 点击提交按钮时调用 generate_output 函数
submit_button.click(generate_output, inputs=[file_input, col1_dropdown, col2_dropdown, choice_radio, slider], outputs=[df_display, df_display, output_image, output_image, submit_button])
demo.launch(share=True)