Spaces:
Runtime error
Runtime error
File size: 4,095 Bytes
6486b0c b724a00 929bdc6 6486b0c b724a00 c8daf59 b724a00 929bdc6 7e1513f 929bdc6 7e1513f 929bdc6 b724a00 a6a064f b724a00 929bdc6 9df2795 929bdc6 9df2795 929bdc6 b724a00 9df2795 929bdc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
#######
# Data loading
#######
df = pd.read_csv("gapminder.csv")
year_values = (int(df["year"].min()), int(df["year"].max()))
metrics = ["lifeExp", "pop", "gdpPercap"]
dimension = ["country", "continent", "year"]
#######
# Helper functions
#######
def get_filtered_data(
continents="All",
countries="All",
min_year=year_values[0],
max_year=year_values[1],
):
if isinstance(continents, str) and continents != "All":
mask_continent = df["continent"] == continents
else:
mask_continent = df["continent"].isin(continents)
if isinstance(countries, str) and countries != "All":
mask_country = df["country"] == countries
else:
mask_country = df["country"].isin(countries)
mask_year = ((df["year"] >= min_year) & (df["year"] <= max_year))
return df[mask_continent & mask_country & mask_year]
def box_plot(df, x, y):
fig = px.box(
df, x=x, y=y, hover_data=df[dimension + [x]],
points="all", color=x)
return fig
def scatter_plot(df, x, y, hue):
fig = px.scatter(
df, x=x, y=y,
color=hue, symbol=hue)
return fig
def line_plot(df, y_axis, label, highlighted):
fig = go.Figure()
if label=="continent":
df = df.groupby(["continent", "year"]).agg({
"lifeExp": "mean",
"pop": "sum",
"gdpPercap": "mean",
}).reset_index()
for i in df[label].unique():
if i == highlighted:
continue
data = df[df[label]==i]
x = data["year"]
y = data[y_axis]
fig.add_trace(go.Scatter(x=x, y=y,
hovertext=[
f"{label}: {i}<br>year: {year}<br>{y_axis}: {value}"
for year, value in zip(x,y)
],
hoverinfo="text",
mode='lines',
line = dict(color='gray', width=1),
# name=i
))
data = df[df[label]==highlighted]
x = data["year"]
y = data[y_axis]
fig.add_trace(go.Scatter(x=x, y=y,
hovertext=[
f"{label}: {highlighted}<br>year: {year}<br>{y_axis}: {value}"
for year, value in zip(x,y)
],
hoverinfo="text",
mode='lines',
line = dict(color='orange', width=10),
# name=highlighted
))
fig.update_layout(showlegend=False)
return fig
#######
# Streamlit app code
#######
st.title('[Gapminder] Exploratory Data Analysis')
st.markdown("## Gapminder Table")
selected_continents = st.multiselect("Select Continents:", df["continent"].unique(), key="table_continent")
selected_countries = st.multiselect("Select Countries:", df.loc[df["continent"].isin(selected_continents), "country"].unique(), key="table_country")
min_year, max_year = st.slider("Select Year:", year_values[0], year_values[1], year_values, key="table_year")
st.dataframe(get_filtered_data(selected_continents, selected_countries, min_year, max_year))
st.markdown("## Gapminder Boxplot")
col1, col2 = st.columns(2)
with col1:
x = st.selectbox("Select x Axis", dimension, 1, key="boxplot_x")
with col2:
y = st.selectbox("Select y Axis", metrics, key="boxplot_y")
st.plotly_chart(box_plot(df, x, y))
st.markdown('## Gapminder Lineplot')
col1, col2, col3 = st.columns(3)
with col3:
label = st.radio("Select label", ["country", "continent"], key="lineplot_label")
with col1:
highlighted = st.selectbox("Select value to hightlight", df[label].unique(), key="lineplot_highlighting")
with col2:
y = st.selectbox("Select hue", metrics, key="lineplot_y")
st.plotly_chart(line_plot(df, y, label, highlighted))
st.markdown('## Gapminder Scatterplot')
col1, col2, col3 = st.columns(3)
with col1:
x = st.selectbox("Select x Axis", metrics, key="scatterplot_x")
with col2:
y = st.selectbox("Select y Axis", metrics, key="scatterplot_y")
with col3:
hue = st.radio("Select hue", ["country", "continent"], key="scatterplot_hue")
st.plotly_chart(scatter_plot(df, x, y, hue))
|