from io import BytesIO from pathlib import Path import holoviews as hv import numpy as np import pandas as pd import pyarrow.parquet as pq from fsspec.parquet import open_parquet_file from holoviews import opts from PIL import Image MAJOR_TOM_LOGO = "https://cdn-uploads.huggingface.co/production/uploads/6304c06eeb6d777a838eab63/BJKsLwX0GG4W3-gdf40TJ.png" MAJOR_TOM_PICTURE = ( "https://upload.wikimedia.org/wikipedia/en/6/6d/Major_tom_space_oddity_video.JPG" ) MAJOR_TOM_REF_URL = "https://huggingface.co/Major-TOM" MAJOR_TOM_ARXIV_URL = "https://www.arxiv.org/abs/2402.12095" PANEL_LOGO = "https://panel.holoviz.org/_static/logo_horizontal_light_theme.png" PANEL_URL = "https://panel.holoviz.org" DATASHADER_LOGO = "https://datashader.org/_static/logo_horizontal.svg" DATASHADER_URL = "https://datashader.org/" REPOSITORY = "Major-TOM" DATASETS = ["Core-S2L2A", "Core-S2L1C"] ESA_EASTING = 250668.73322714816 ESA_NORTHING = 6259216.653115547 META_DATA_COLUMNS = { "Coastal aerosol": "B01", "Blue": "B02", "Green": "B03", "Red": "B04", "Vegetation Blue": "B05", "Vegetation Green": "B06", "Vegetation Red": "B07", "NIR": "B08", "Narrow NIR": "B8A", "Water vapour": "B09", "SWIR, 1613.7": "B11", "SWIR, 2202.4": "B12", "Cloud Mask": "cloud_mask", "Thumbnail": "thumbnail", } DATA_PATH = Path(__file__).parent / "data" DESCRIPTION = f"""\ ## Dataset Explorer This app provides a way of exploring samples present in the [MajorTOM-Core]({MAJOR_TOM_REF_URL}) dataset. It contains nearly every piece of Earth captured by ESA [Sentinel-2](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2) satellite. [Website]({MAJOR_TOM_REF_URL}), [arXiv Paper]({MAJOR_TOM_ARXIV_URL}) ## Instructions To find a sample, navigate on the map to a place of interest. Click the map to find a dataset sample at the location you clicked. ## Powered by """ MAJOR_TOM_LYRICS = """ Standing there alone, the ship is waiting All systems are go, are you sure? Control is not convinced, but the computer Has the evidence, no need to abort The countdown starts Watching in a trance, the crew is certain Nothing left to chance, all is working Trying to relax up in the capsule "Send me up a drink, " jokes **Major Tom** The count goes on Four, three, two, one Earth below us, drifting, falling Floating weightless, calling, calling home Second stage is cut, we're now in orbit Stabilizers up, running perfect Starting to collect requested data "What will it affect when all is done?" Thinks **Major Tom** Back at ground control, there is a problem Go to rockets full, not responding "Hello **Major Tom**, are you receiving? Turn the thrusters on, we're standing by" There's no reply Four, three, two, one Earth below us, drifting, falling Floating weightless, calling, calling home Across the stratosphere a final message "Give my wife my love, " then nothing more Far beneath the ship, the world is mourning They don't realize he's alive No one understands, but **Major Tom** sees "Now the light commands, this is my home I'm coming home" Earth below us, drifting, falling Floating weightless, coming home Earth below us, drifting, falling Floating weightless, coming home Earth below us, drifting, falling Floating weightless, coming, coming home Home Home Home Home Home """ hv.extension("bokeh") opts.defaults( # opts.Curve(xaxis=None, yaxis=None, show_grid=False, show_frame=False, # color='orangered', framewise=True, width=100), opts.HLine(color="gray", line_width=1), # opts.Layout(shared_axes=False), opts.VLine(color="gray", line_width=1), ) def _meta_data_url(dataset="Core-S2L2A", repository=REPOSITORY): return f"https://huggingface.co/datasets/{repository}/{dataset}/resolve/main/metadata.parquet" def _meta_data_path(dataset="Core-S2L2A", repository=REPOSITORY): DATA_PATH.mkdir(parents=True, exist_ok=True) return DATA_PATH / f"{dataset}_metadata.parquet" def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY): path = _meta_data_path(dataset=dataset) if not path.exists(): data = pd.read_parquet(_meta_data_url(dataset=dataset)) data.to_parquet(path) data = pd.read_parquet(path) data["centre_easting"], data["centre_northing"] = ( hv.util.transform.lon_lat_to_easting_northing( data["centre_lon"], data["centre_lat"] ) ) return data def get_image(row, column="thumbnail"): parquet_url = row["parquet_url"] parquet_row = row["parquet_row"] print(parquet_url, parquet_row, column) with open_parquet_file(parquet_url, columns=[column]) as f: with pq.ParquetFile(f) as pf: first_row_group = pf.read_row_group(parquet_row, columns=[column]) stream = BytesIO(first_row_group[column][0].as_py()) image = Image.open(stream) return image def euclidean_distance(x, y, target_x, target_y): return np.sqrt((x - target_x) ** 2 + (y - target_y) ** 2) def get_closest_row(df, target_easting, target_northing): distance = euclidean_distance( df["centre_easting"], df["centre_northing"], target_easting, target_northing ) closest_row = df.loc[distance.idxmin()] return closest_row def get_closest_rows(df, target_easting, target_northing): distance = euclidean_distance( df["centre_easting"], df["centre_northing"], target_easting, target_northing ) closest_rows = df[distance == distance.min()] return closest_rows