Spaces:
Sleeping
Sleeping
# %% | |
import io | |
import uuid | |
from dataclasses import dataclass | |
from typing import Optional | |
import matplotlib.pylab as plt | |
import numpy as np | |
import pandas as pd | |
import solara | |
import solara.lab | |
from matplotlib.figure import Figure | |
from scipy import stats | |
from solara.components.file_drop import FileInfo | |
def make_cdf_figure( | |
values_left: np.ndarray, values_right: np.ndarray, stat_loc: float | |
) -> Figure: | |
fig = Figure(figsize=(5, 5)) | |
ax = fig.subplots() | |
ax.axvline(stat_loc, color="grey") | |
# Cumulative distributions. | |
ax.ecdf(values_left, label="left") | |
ax.ecdf(values_right, label="right") | |
ax.legend() | |
ax.set_xlabel("Value") | |
ax.set_ylabel("CDF") | |
return fig | |
def make_pdf_figure( | |
values_left: np.ndarray, values_right: np.ndarray, stat_loc: float | |
) -> Figure: | |
fig = Figure(figsize=(5, 5)) | |
ax = fig.subplots() | |
ax.axvline(stat_loc, color="grey") | |
# Cumulative distributions. | |
ax.hist(values_left, bins="fd", density=True, histtype="step", label="left") | |
ax.hist(values_right, bins="fd", density=True, histtype="step", label="right") | |
ax.legend() | |
plt.show(fig) | |
ax.set_xlabel("Value") | |
ax.set_ylabel("PDF") | |
return fig | |
# %% | |
def dropna(values: np.ndarray) -> np.ndarray: | |
return values[~np.isnan(values)] | |
def KSTestResult(values_left, values_right): | |
values_left = dropna(values_left) | |
values_right = dropna(values_right) | |
kstat = stats.ks_2samp(values_left, values_right) | |
fig_cdf = make_cdf_figure(values_left, values_right, kstat.statistic_location) | |
fig_pdf = make_pdf_figure(values_left, values_right, kstat.statistic_location) | |
with solara.Card("Kolmogorov-Smirnov Test"): | |
with solara.Columns(): | |
solara.FigureMatplotlib(fig_cdf) | |
solara.FigureMatplotlib(fig_pdf) | |
solara.Markdown("# Test Result") | |
solara.Info( | |
f"statistic: {kstat.statistic:.3g}", | |
) | |
solara.Info( | |
f"p-value: {kstat.pvalue:.3g}", | |
) | |
solara.Info( | |
f"location: {kstat.statistic_location:.3g}", | |
) | |
class Selection: | |
file: Optional[str] = None | |
column: Optional[str] = None | |
def is_set(self) -> bool: | |
return self.file is not None and self.column is not None | |
def columns(self) -> list[str]: | |
if self.file is not None: | |
return list(data_store.value[self.file].columns) | |
return [] | |
def array(self) -> np.ndarray: | |
if self.is_set: | |
return data_store.value[self.file][self.column].to_numpy() | |
return np.array([]) | |
def all_set(selections: list[Selection]) -> bool: | |
return all(s.is_set for s in selections) | |
def Selectors(selection: solara.Reactive[Selection]): | |
solara.Select( | |
label="Select file", | |
values=list(data_store.value.keys()), | |
value=selection.value.file, | |
on_value=lambda x: selection.update(file=x, column=None), | |
) | |
solara.Select( | |
label="Select_column", | |
values=selection.value.columns, | |
value=selection.value.column, | |
on_value=lambda x: selection.update(column=x), | |
) | |
file_info: solara.Reactive[list[FileInfo]] = solara.reactive([]) | |
data_store = solara.Reactive({}) | |
selection_left = solara.reactive(Selection()) | |
selection_right = solara.reactive(Selection()) | |
def KSApp(): | |
def load_data(): | |
d = {} | |
for f in file_info.value: | |
b_io = io.BytesIO(f["data"]) | |
df = pd.read_csv(b_io) | |
d[f["name"]] = df | |
data_store.set(d) | |
_ = solara.use_memo(load_data, dependencies=[file_info.value]) | |
upload_key = solara.reactive(uuid.uuid4()) | |
def clear_all(): | |
upload_key.set(uuid.uuid4()) | |
selection_left.set(Selection()) | |
selection_right.set(Selection()) | |
file_info.set([]) | |
data_store.set({}) | |
with solara.ColumnsResponsive([3, 9]): | |
with solara.Card("Input"): | |
solara.FileDropMultiple( | |
label="Upload CSV files", | |
on_file=file_info.set, | |
lazy=False, | |
) # .key(upload_key.value.hex) | |
solara.Text("Select left:") | |
Selectors(selection_left) | |
solara.Text("Select left:") | |
Selectors(selection_right) | |
# solara.Button(label="Clear", on_click=clear_all) | |
if not all_set([selection_left.value, selection_right.value]): | |
with solara.Card(): | |
solara.Text("Please upload data and select both files and columns") | |
else: | |
KSTestResult(selection_left.value.array, selection_right.value.array) | |
page = KSApp() | |