# %% import io import uuid from dataclasses import dataclass from typing import Optional import matplotlib.pylab as plt import numpy as np import pandas as pd import solara import solara.lab from matplotlib.figure import Figure from scipy import stats from solara.components.file_drop import FileInfo def make_cdf_figure( values_left: np.ndarray, values_right: np.ndarray, stat_loc: float ) -> Figure: fig = Figure(figsize=(5, 5)) ax = fig.subplots() ax.axvline(stat_loc, color="grey") # Cumulative distributions. ax.ecdf(values_left, label="left") ax.ecdf(values_right, label="right") ax.legend() ax.set_xlabel("Value") ax.set_ylabel("CDF") return fig def make_pdf_figure( values_left: np.ndarray, values_right: np.ndarray, stat_loc: float ) -> Figure: fig = Figure(figsize=(5, 5)) ax = fig.subplots() ax.axvline(stat_loc, color="grey") # Cumulative distributions. ax.hist(values_left, bins="fd", density=True, histtype="step", label="left") ax.hist(values_right, bins="fd", density=True, histtype="step", label="right") ax.legend() plt.show(fig) ax.set_xlabel("Value") ax.set_ylabel("PDF") return fig # %% def dropna(values: np.ndarray) -> np.ndarray: return values[~np.isnan(values)] @solara.component def KSTestResult(values_left, values_right): values_left = dropna(values_left) values_right = dropna(values_right) kstat = stats.ks_2samp(values_left, values_right) fig_cdf = make_cdf_figure(values_left, values_right, kstat.statistic_location) fig_pdf = make_pdf_figure(values_left, values_right, kstat.statistic_location) with solara.Card("Kolmogorov-Smirnov Test"): with solara.Columns(): solara.FigureMatplotlib(fig_cdf) solara.FigureMatplotlib(fig_pdf) solara.Markdown("# Test Result") solara.Info( f"statistic: {kstat.statistic:.3g}", ) solara.Info( f"p-value: {kstat.pvalue:.3g}", ) solara.Info( f"location: {kstat.statistic_location:.3g}", ) @dataclass class Selection: file: Optional[str] = None column: Optional[str] = None @property def is_set(self) -> bool: return self.file is not None and self.column is not None @property def columns(self) -> list[str]: if self.file is not None: return list(data_store.value[self.file].columns) return [] @property def array(self) -> np.ndarray: if self.is_set: return data_store.value[self.file][self.column].to_numpy() return np.array([]) def all_set(selections: list[Selection]) -> bool: return all(s.is_set for s in selections) @solara.component def Selectors(selection: solara.Reactive[Selection]): solara.Select( label="Select file", values=list(data_store.value.keys()), value=selection.value.file, on_value=lambda x: selection.update(file=x, column=None), ) solara.Select( label="Select_column", values=selection.value.columns, value=selection.value.column, on_value=lambda x: selection.update(column=x), ) file_info: solara.Reactive[list[FileInfo]] = solara.reactive([]) data_store = solara.Reactive({}) selection_left = solara.reactive(Selection()) selection_right = solara.reactive(Selection()) @solara.component def KSApp(): def load_data(): d = {} for f in file_info.value: b_io = io.BytesIO(f["data"]) df = pd.read_csv(b_io) d[f["name"]] = df data_store.set(d) _ = solara.use_memo(load_data, dependencies=[file_info.value]) upload_key = solara.reactive(uuid.uuid4()) def clear_all(): upload_key.set(uuid.uuid4()) selection_left.set(Selection()) selection_right.set(Selection()) file_info.set([]) data_store.set({}) with solara.ColumnsResponsive([3, 9]): with solara.Card("Input"): solara.FileDropMultiple( label="Upload CSV files", on_file=file_info.set, lazy=False, ) # .key(upload_key.value.hex) solara.Text("Select left:") Selectors(selection_left) solara.Text("Select left:") Selectors(selection_right) # solara.Button(label="Clear", on_click=clear_all) if not all_set([selection_left.value, selection_right.value]): with solara.Card(): solara.Text("Please upload data and select both files and columns") else: KSTestResult(selection_left.value.array, selection_right.value.array) page = KSApp()