major-tom-explorer / utils.py
MarcSkovMadsen's picture
update
64df5de
raw
history blame
3.5 kB
from io import BytesIO
from pathlib import Path
import holoviews as hv
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from fsspec.parquet import open_parquet_file
from holoviews import opts
from PIL import Image
MAJOR_TOM_LOGO = "https://cdn-uploads.huggingface.co/production/uploads/6304c06eeb6d777a838eab63/BJKsLwX0GG4W3-gdf40TJ.png"
MAJOR_TOM_PICTURE = (
"https://upload.wikimedia.org/wikipedia/en/6/6d/Major_tom_space_oddity_video.JPG"
)
MAJOR_TOM_REF_URL = "https://huggingface.co/Major-TOM"
PANEL_LOGO = "https://panel.holoviz.org/_static/logo_horizontal_light_theme.png"
PANEL_URL = "https://panel.holoviz.org"
DATASHADER_LOGO = "https://datashader.org/_static/logo_horizontal.svg"
DATASHADER_URL = "https://datashader.org/"
REPOSITORY = "Major-TOM"
DATASETS = ["Core-S2L2A", "Core-S2L1C"]
DATA_PATH = Path(__file__).parent / "data"
DESCRIPTION = f"""\
## Dataset Explorer
This app provides a way of exploring samples present in the [MajorTOM-Core]({MAJOR_TOM_REF_URL}) dataset. It contains nearly every piece of Earth captured by ESA [Sentinel-2](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2) satellite.
## Instructions
To find a sample, navigate on the map to a place of interest. Click the map to find a dataset sample at the location you clicked.
## Powered by
"""
hv.extension("bokeh")
opts.defaults(
# opts.Curve(xaxis=None, yaxis=None, show_grid=False, show_frame=False,
# color='orangered', framewise=True, width=100),
opts.HLine(color="gray", line_width=1),
# opts.Layout(shared_axes=False),
opts.VLine(color="gray", line_width=1),
)
def _meta_data_url(dataset="Core-S2L2A", repository=REPOSITORY):
return f"https://huggingface.co/datasets/{repository}/{dataset}/resolve/main/metadata.parquet"
def _meta_data_path(dataset="Core-S2L2A", repository=REPOSITORY):
DATA_PATH.mkdir(parents=True, exist_ok=True)
return DATA_PATH / f"{dataset}_metadata.parquet"
def get_meta_data(dataset="Core-S2L2A", repository=REPOSITORY):
path = _meta_data_path(dataset=dataset)
if not path.exists():
data = pd.read_parquet(_meta_data_url(dataset=dataset))
data.to_parquet(path)
data = pd.read_parquet(path)
data["centre_easting"], data["centre_northing"] = (
hv.util.transform.lon_lat_to_easting_northing(
data["centre_lon"], data["centre_lat"]
)
)
return data
def get_image(row):
parquet_url = row["parquet_url"]
parquet_row = row["parquet_row"]
print(parquet_url)
print(parquet_row)
with open_parquet_file(parquet_url, columns=["thumbnail"]) as f:
with pq.ParquetFile(f) as pf:
first_row_group = pf.read_row_group(parquet_row, columns=["thumbnail"])
stream = BytesIO(first_row_group["thumbnail"][0].as_py())
image = Image.open(stream)
return image
def euclidean_distance(x, y, target_x, target_y):
return np.sqrt((x - target_x) ** 2 + (y - target_y) ** 2)
def get_closest_row(df, target_easting, target_northing):
distance = euclidean_distance(
df["centre_easting"], df["centre_northing"], target_easting, target_northing
)
closest_row = df.loc[distance.idxmin()]
return closest_row
def get_closest_rows(df, target_easting, target_northing):
distance = euclidean_distance(
df["centre_easting"], df["centre_northing"], target_easting, target_northing
)
closest_rows = df[distance == distance.min()]
return closest_rows