Spaces:

ebrowne
/

retrieval-study

Sleeping

elibrowne

Loading data?

a425fa9 5 months ago

3.63 kB

	import gradio as gr
	import os

	# PERSISTENT DATA STORAGE: these are used to upload user responses to a dataset

	import json
	from datetime import datetime
	from pathlib import Path
	from uuid import uuid4
	from huggingface_hub import CommitScheduler

	JSON_DATASET_DIR = Path("json_dataset")
	JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)

	JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"

	scheduler = CommitScheduler(
	repo_id="ebrowne/test-data",
	repo_type="dataset",
	folder_path=JSON_DATASET_DIR,
	path_in_repo="data",
	token = os.getenv("HF_TOKEN")
	)

	def save_json(score1, score2):
	with scheduler.lock:
	with JSON_DATASET_PATH.open("a") as f:
	json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
	f.write("\n")

	# READING EXISTING DATA: this is used to read questionss

	from datasets import load_dataset
	qa_data = load_dataset("ebrowne/test-data", data_files = "test.json")
	loaded_text = qa_data["train"]["example_string"][0]


	# VARIABLES: will eventually be loaded with JSON from a dataset

	question_text = """
	### Bar Question
	What is the answer to this question?"""
	answers_text = ["A", "B", "C", "D"]

	# BLOCKS: main user interface

	with gr.Blocks() as user_eval:
	# Title text introducing study
	gr.Markdown("""
	# Legal Retriever Evaluation Study
	Thank you for your participation! Here are some basic instructions on how to complete the legal study.
	""")

	# Passages and user evaluations thereof
	with gr.Row(equal_height = False, visible = False) as evals:
	# Passage text
	with gr.Column(scale = 2) as passages:
	passage_display = gr.Markdown("""
	### Question
	""" + loaded_text +
	"""
	### Relevant Passages
	- Dataset 1
	- Dataset 2
	- More text
	- More text
	- More text
	- More text
	### Auto-Generated Summary
	This is a summary of the above legal passages, which imitates how a RAG system might \
	encorporate retrieved data into its context to give a better response to a certain query.
	""")

	# Scoring box
	with gr.Column(scale = 1) as scores:
	desc_1 = gr.Markdown("How relevant are these passages to our query?")
	eval_1 = gr.Slider(1, 5, step = 0.5)
	desc_2 = gr.Markdown("How novel are these passages compared to the previous passages?")
	eval_2 = gr.Slider(1, 5, step = 0.5)
	btn = gr.Button("Next")

	def next(eval_1, eval_2 = 0):
	print(eval_1 + eval_2)

	btn.click(fn = save_json, inputs = [eval_1, eval_2])

	# Question and answering dynamics
	with gr.Row() as question:
	with gr.Column():
	gr.Markdown(question_text)
	a = gr.Button(answers_text[0])
	b = gr.Button(answers_text[1])
	c = gr.Button(answers_text[2])
	d = gr.Button(answers_text[3])

	def answer():
	return {
	question: gr.Row(visible = False),
	evals: gr.Row(visible = True)
	}

	a.click(fn = answer, outputs = [question, evals])
	b.click(fn = answer, outputs = [question, evals])
	c.click(fn = answer, outputs = [question, evals])
	d.click(fn = answer, outputs = [question, evals])

	# Starts on question, switches to evaluation after the user answers
	user_eval.launch()