Spaces:

xycold
/

DT_choose_feature

Sleeping

File size: 3,812 Bytes

57e7263
65f466e
 
 
6b4bacb
 
 
 
 
 
 
 
 
d0e80d6
6b4bacb
 
57e7263
6b4bacb
 
 
 
 
 
 
 
57e7263
6b4bacb
 
57e7263
6b4bacb
 
 
08cb4ba
6b4bacb
 
57e7263
 
 
 
 
6b4bacb
57e7263
6b4bacb
 
65f466e
6b4bacb
 
 
 
 
 
 
57e7263
6b4bacb
 
 
 
 
 
 
 
 
 
 
 
 
 
57e7263
 
6b4bacb
65f466e
5e9f17d
6b4bacb
57e7263
08cb4ba
 
6b4bacb
5e9f17d
 
155dc9d
5e9f17d
 
08cb4ba
 
 
65f466e
57e7263
6b4bacb
57e7263
 
 
65f466e
57e7263
5e9f17d
57e7263
6b4bacb

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt

import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
import tempfile
import warnings

warnings.filterwarnings(action='ignore', category=UserWarning)

# 设置中文字体和编码
plt.rcParams['font.sans-serif'] = ['SimHei']  # 选择合适的中文字体，这里使用黑体
plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示字符

def process_file(file):
    # 读取CSV或Excel文件并创建DataFrame
    if file.name.endswith('.csv'):
        df = pd.read_csv(file.name)
    elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
        df = pd.read_excel(file.name)
    else:
        return "不支持的数据文件格式。"
    
    columns = df.columns.tolist()
    feature_columns = columns[:-1]
    last_column = [columns[-1]]
    # 返回前5行数据，更新下拉列表选项，并使其他控件可见
    return (gr.update(choices=feature_columns, value=feature_columns[0], visible=True), 
            gr.update(choices=last_column, value=last_column[-1], visible=True), 
            gr.update(visible=True),
            gr.update(visible=True),
            df.head()
            )

def update_slider(choice):
    # 更新数轴控件的可见性
    return gr.update(visible=choice == "是")

def generate_output(file, column1, column2, choice, bins):
    df = pd.read_csv(file.name)
    data_x = df[column1]
    data_y = df[column2]

    # 自动判断column1的数据类型
    if choice == "是":
        # 如果是连续值，则进行分组
        data_x = pd.qcut(data_x, q=bins, duplicates='drop')
    else:
        # 如果是离散值，则直接使用
        pass
    
    # 统计每个身高分段中不同心血管疾病类别的数量
    counts = pd.crosstab(data_x, data_y)
    # 绘制分段柱形图
    counts.plot(kind='bar')
    # 设置画布大小
    plt.figure(figsize=(bins*2, 6))
    # 设置 x 轴刻度标签横向显示
    plt.xticks(rotation=0)
    plt.xlabel(column1, fontsize=12)
    plt.ylabel(column2, fontsize=12)
    # plt.legend(['不患病', '患病'])
    plt.title(f'{column1}与{column2}的关系', fontsize=14)
    # plt.show()   

    image_path = 'output.png'
    plt.savefig(image_path)
    # plt.close()

    return df.head(), gr.update(visible=True), image_path, gr.update(visible=True), gr.update(visible=True)
    
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="上传表格文件（支持CSV、XLS、XLSX等格式", file_types=["csv", "xls", "xlsx"])
            col1_dropdown = gr.Dropdown(label="请选择特征列", visible=False)
            col2_dropdown = gr.Dropdown(label="选择标签列", visible=False)
            choice_radio = gr.Radio(["是", "否"], label="特征列是否为连续值", value="否", visible=False)
            slider = gr.Slider(minimum=3, maximum=7, step=1, label="选择将特征列分组的分组数", visible=False, value=4)
            submit_button = gr.Button("查看结果", visible=False)
        with gr.Column():
            df_display = gr.Dataframe(visible=False)
            output_image = gr.Image(visible=False)

    # 文件上传后调用 process_file 函数
    file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, choice_radio, df_display, df_display])
    
    # 选择框值改变时调用 update_slider 函数
    choice_radio.change(update_slider, inputs=choice_radio, outputs=slider)
    
    # 点击提交按钮时调用 generate_output 函数
    submit_button.click(generate_output, inputs=[file_input, col1_dropdown, col2_dropdown, choice_radio, slider], outputs=[df_display, df_display, output_image, output_image, submit_button])

demo.launch(share=True)