File size: 4,699 Bytes
57e7263
65f466e
d9b2d85
65f466e
6b4bacb
 
 
 
d9b2d85
 
 
 
 
fb02739
6b4bacb
 
57e7263
6b4bacb
 
 
 
 
 
 
 
57e7263
6b4bacb
 
57e7263
d0cd62f
6b4bacb
 
d0cd62f
a523421
 
6b4bacb
57e7263
d0cd62f
 
 
 
57e7263
 
 
 
6b4bacb
2cf0c0d
 
 
 
 
 
 
6b4bacb
 
65f466e
6b4bacb
 
 
1e62ed7
 
6b4bacb
 
 
57e7263
6b4bacb
 
 
89a14c6
d0cd62f
 
6b4bacb
3ef953e
6b4bacb
 
 
 
 
 
57e7263
 
6b4bacb
65f466e
a523421
6b4bacb
57e7263
0304d5e
08cb4ba
6b4bacb
5e9f17d
 
a523421
5e9f17d
 
8790aea
08cb4ba
 
65f466e
d0cd62f
 
57e7263
d0cd62f
57e7263
d0cd62f
 
 
 
57e7263
65f466e
57e7263
a523421
57e7263
6b4bacb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import pandas as pd
import matplotlib.font_manager as font_manager
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings(action='ignore', category=UserWarning)

font_path = './SimHei.ttf'  # 替换为实际的字体文件路径

font_manager.fontManager.addfont(font_path)
plt.rcParams['font.family'] = 'SimHei'  # 将 'custom_font' 替换为您为该字体指定的名称


plt.rcParams['axes.unicode_minus'] = False  # 设置正常显示字符

def process_file(file):
    # 读取CSV或Excel文件并创建DataFrame
    if file.name.endswith('.csv'):
        df = pd.read_csv(file.name)
    elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
        df = pd.read_excel(file.name)
    else:
        return "不支持的数据文件格式。"
    
    columns = df.columns.tolist()
    feature_columns = columns[:-1]
    last_column = [columns[-1]]
    # 返回前5行数据,更新下拉列表选项,并使其他控件可见
    return (gr.update(choices=feature_columns, visible=True), 
            gr.update(choices=last_column, value=last_column[-1], visible=True), 
            gr.update(visible=True),
            # gr.update(visible=True),
            df.head(), 
            gr.update(visible=True)
            )

def update_choice_radio(choice_feature_column):
    # 更新数轴控件的可见性
    return gr.update(label="“" + choice_feature_column + "”是否为连续值", visible=True)

def update_slider(choice):
    # 更新数轴控件的可见性
    return gr.update(visible=choice == "是")

def generate_output(file, column1, column2, choice, bins):
    # 读取CSV或Excel文件并创建DataFrame
    if file.name.endswith('.csv'):
        df = pd.read_csv(file.name)
    elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
        df = pd.read_excel(file.name)
    else:
        return "不支持的数据文件格式。"
    data_x = df[column1]
    data_y = df[column2]

    # 自动判断column1的数据类型
    if choice == "是":
        # 如果是连续值,则进行分组
        data_x = pd.qcut(data_x, q=bins, duplicates='drop')
        data_x = data_x.apply(lambda x: f'{x.left:.2f} - {x.right:.2f}')
    else:
        # 如果是离散值,则直接使用
        pass
    
    # 统计每个身高分段中不同心血管疾病类别的数量
    counts = pd.crosstab(data_x, data_y)
    # 设置画布大小
    plt.figure(figsize=(bins*2, 10))
    # 绘制分段柱形图
    counts.plot(kind='bar')
    # 设置 x 轴刻度标签横向显示
    plt.xticks(rotation=15)
    plt.xlabel(column1, fontsize=12)
    plt.ylabel(column2, fontsize=12)
    # plt.legend(['不患病', '患病'])
    plt.title(f'{column1}{column2}的关系', fontsize=14)
    # plt.show()   

    image_path = 'output.png'
    plt.savefig(image_path)
    # plt.close()

    return df.head(), gr.update(visible=True), image_path, gr.update(visible=True)
    
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="上传表格文件(支持CSV、XLS、XLSX等格式", file_types=["csv", "xls", "xlsx"])
            col1_dropdown = gr.Dropdown(label="请选择特征列", visible=False)
            col2_dropdown = gr.Dropdown(label="选择标签列", visible=False)
            choice_radio = gr.Radio(["是", "否"], label="特征列是否为连续值", visible=False) # , value="否"
            slider = gr.Slider(minimum=3, maximum=7, step=1, label="选择将特征列分组的分组数", visible=False, value=4)
            submit_button = gr.Button("查看结果", visible=False)
        with gr.Column():
            df_display = gr.Dataframe(visible=False)
            output_image = gr.Image(visible=False)

    # # 文件上传后调用 process_file 函数
    # file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, choice_radio, df_display, df_display, submit_button])
    # 文件上传后调用 process_file 函数
    file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, df_display, df_display, submit_button])
    
    # 选择框值改变时调用 update_col2_dropdown 函数
    col1_dropdown.change(update_choice_radio, inputs=col1_dropdown, outputs=choice_radio)

    # 选择框值改变时调用 update_slide函数
    choice_radio.change(update_slider, inputs=choice_radio, outputs=slider)
    
    # 点击提交按钮时调用 generate_output 函数
    submit_button.click(generate_output, inputs=[file_input, col1_dropdown, col2_dropdown, choice_radio, slider], outputs=[df_display, df_display, output_image, output_image])

demo.launch(share=True)