Spaces:
Sleeping
Sleeping
File size: 4,755 Bytes
e05e748 12638cb e05e748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# %%
import io
import uuid
from dataclasses import dataclass
from typing import Optional
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import solara
import solara.lab
from matplotlib.figure import Figure
from scipy import stats
from solara.components.file_drop import FileInfo
def make_cdf_figure(
values_left: np.ndarray, values_right: np.ndarray, stat_loc: float
) -> Figure:
fig = Figure(figsize=(5, 5))
ax = fig.subplots()
ax.axvline(stat_loc, color="grey")
# Cumulative distributions.
ax.ecdf(values_left, label="left")
ax.ecdf(values_right, label="right")
ax.legend()
ax.set_xlabel("Value")
ax.set_ylabel("CDF")
return fig
def make_pdf_figure(
values_left: np.ndarray, values_right: np.ndarray, stat_loc: float
) -> Figure:
fig = Figure(figsize=(5, 5))
ax = fig.subplots()
ax.axvline(stat_loc, color="grey")
# Cumulative distributions.
ax.hist(values_left, bins="fd", density=True, histtype="step", label="left")
ax.hist(values_right, bins="fd", density=True, histtype="step", label="right")
ax.legend()
plt.show(fig)
ax.set_xlabel("Value")
ax.set_ylabel("PDF")
return fig
# %%
def dropna(values: np.ndarray) -> np.ndarray:
return values[~np.isnan(values)]
@solara.component
def KSTestResult(values_left, values_right):
values_left = dropna(values_left)
values_right = dropna(values_right)
kstat = stats.ks_2samp(values_left, values_right)
fig_cdf = make_cdf_figure(values_left, values_right, kstat.statistic_location)
fig_pdf = make_pdf_figure(values_left, values_right, kstat.statistic_location)
with solara.Card("Kolmogorov-Smirnov Test"):
with solara.Columns():
solara.FigureMatplotlib(fig_cdf)
solara.FigureMatplotlib(fig_pdf)
solara.Markdown("# Test Result")
solara.Info(
f"statistic: {kstat.statistic:.3g}",
)
solara.Info(
f"p-value: {kstat.pvalue:.3g}",
)
solara.Info(
f"location: {kstat.statistic_location:.3g}",
)
@dataclass
class Selection:
file: Optional[str] = None
column: Optional[str] = None
@property
def is_set(self) -> bool:
return self.file is not None and self.column is not None
@property
def columns(self) -> list[str]:
if self.file is not None:
return list(data_store.value[self.file].columns)
return []
@property
def array(self) -> np.ndarray:
if self.is_set:
return data_store.value[self.file][self.column].to_numpy()
return np.array([])
def all_set(selections: list[Selection]) -> bool:
return all(s.is_set for s in selections)
@solara.component
def Selectors(selection: solara.Reactive[Selection]):
solara.Select(
label="Select file",
values=list(data_store.value.keys()),
value=selection.value.file,
on_value=lambda x: selection.update(file=x, column=None),
)
solara.Select(
label="Select_column",
values=selection.value.columns,
value=selection.value.column,
on_value=lambda x: selection.update(column=x),
)
file_info: solara.Reactive[list[FileInfo]] = solara.reactive([])
data_store = solara.Reactive({})
selection_left = solara.reactive(Selection())
selection_right = solara.reactive(Selection())
@solara.component
def KSApp():
def load_data():
d = {}
for f in file_info.value:
b_io = io.BytesIO(f["data"])
df = pd.read_csv(b_io)
d[f["name"]] = df
data_store.set(d)
_ = solara.use_memo(load_data, dependencies=[file_info.value])
upload_key = solara.reactive(uuid.uuid4())
def clear_all():
upload_key.set(uuid.uuid4())
selection_left.set(Selection())
selection_right.set(Selection())
file_info.set([])
data_store.set({})
with solara.ColumnsResponsive([3, 9]):
with solara.Card("Input"):
solara.FileDropMultiple(
label="Upload CSV files",
on_file=file_info.set,
lazy=False,
) # .key(upload_key.value.hex)
solara.Text("Select left:")
Selectors(selection_left)
solara.Text("Select left:")
Selectors(selection_right)
# solara.Button(label="Clear", on_click=clear_all)
if not all_set([selection_left.value, selection_right.value]):
with solara.Card():
solara.Text("Please upload data and select both files and columns")
else:
KSTestResult(selection_left.value.array, selection_right.value.array)
page = KSApp()
|