File size: 4,354 Bytes
1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 a4421c2 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 a4421c2 1d123d6 a4421c2 c328472 1d123d6 c328472 1d123d6 a4421c2 185bd85 b89f914 185bd85 b89f914 a4421c2 1d123d6 a4421c2 c328472 1d123d6 a4421c2 2ae53f8 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 2ae53f8 3357a3b 2ae53f8 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 1d123d6 c328472 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import re
import gradio as gr
from huggingface_hub import get_collection
def extract_collection_id(input_text):
if url_match := re.match(r"https://huggingface\.co/collections/(.+)$", input_text):
return url_match[1]
# Check if input is already in the correct format
return input_text if re.match(r"^[\w-]+/[\w-]+", input_text) else None
def load_collection():
collection_input = os.getenv("COLLECTION_SLUG_OR_URL")
if not collection_input:
raise ValueError("COLLECTION_SLUG_OR_URL environment variable is not set.")
collection_id = extract_collection_id(collection_input)
if not collection_id:
raise ValueError(
"Invalid collection ID or URL in COLLECTION_SLUG_OR_URL environment variable."
)
collection = get_collection(collection_id)
if dataset_ids := [
item.item_id for item in collection.items if item.item_type == "dataset"
]:
return dataset_ids, collection_id
else:
raise ValueError("No datasets found in this collection.")
def display_dataset(dataset_ids, index):
dataset_id = dataset_ids[index]
return gr.HTML(f"""<iframe
src="https://huggingface.co/datasets/{dataset_id}/embed/viewer"
frameborder="0"
width="100%"
height="560px"
></iframe>""")
def navigate_dataset(dataset_ids, index, direction):
new_index = (index + direction) % len(dataset_ids)
return (
new_index,
f"Dataset {new_index + 1} of {len(dataset_ids)}: {dataset_ids[new_index]}",
)
def get_display_name(collection_id):
# Pattern to match username/repo-name with an optional ID of 16 or more hexadecimal characters
pattern = r"^(.+?)-([a-f0-9]{16,})$"
if match := re.match(pattern, collection_id):
return match[1]
else:
# If no match, return the original
return collection_id
try:
dataset_ids, collection_id = load_collection()
display_name = get_display_name(collection_id)
with gr.Blocks() as demo:
gr.Markdown(f"<h1>Dataset Viewer for Collection: {display_name}</h1>")
gr.Markdown(
f"[View full collection on Hugging Face](https://huggingface.co/collections/{collection_id})"
)
gr.Markdown("""
This app allows you to browse and view datasets from a specific Hugging Face collection.
Use the 'Previous' and 'Next' buttons to navigate through the datasets in the collection.
See below for how to set up this app for a different collection.""")
index_state = gr.State(value=0)
with gr.Row():
left_btn = gr.Button("Previous")
right_btn = gr.Button("Next")
dataset_info = gr.Markdown(f"Dataset 1 of {len(dataset_ids)}: {dataset_ids[0]}")
iframe_output = gr.HTML()
gr.Markdown("""**Note**: This space is currently set up to display datasets from a specific collection.
If you'd like to use it for a different collection:
1. Duplicate this space
2. In your duplicated space, set the `COLLECTION_SLUG_OR_URL` environment variable to your desired collection ID or URL
3. Your new space will then display datasets from your chosen collection!
Checkout the [docs](https://huggingface.co/docs/hub/datasets-viewer-embed) for other ways to use the iframe viewer.
""")
left_btn.click(
navigate_dataset,
inputs=[gr.State(dataset_ids), index_state, gr.Number(-1, visible=False)],
outputs=[index_state, dataset_info],
)
right_btn.click(
navigate_dataset,
inputs=[gr.State(dataset_ids), index_state, gr.Number(1, visible=False)],
outputs=[index_state, dataset_info],
)
index_state.change(
display_dataset,
inputs=[gr.State(dataset_ids), index_state],
outputs=[iframe_output],
)
# Initialize the display with the first dataset
demo.load(
fn=lambda: display_dataset(dataset_ids, 0),
inputs=None,
outputs=[iframe_output],
)
if __name__ == "__main__":
demo.launch()
except Exception as e:
print(f"Error: {str(e)}")
print(
"Please set the COLLECTION_SLUG_OR_URL environment variable with a valid collection ID or URL."
)
|