Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
from collections import defaultdict | |
# Load and preprocess the data | |
data = [] | |
categories = defaultdict(set) | |
# Assuming the data loading part is kept outside a function so it loads once | |
with open("Legal issues.csv", "r") as file: | |
next(file) # Skip the header line | |
for line in file: | |
url, legal_issue_area = line.strip().split(",", 1) | |
data.append({"legal_issue_area": legal_issue_area.strip(), "url": url.strip()}) | |
# Categorization and Tagging | |
words = re.findall(r'\w+', legal_issue_area.lower()) | |
for word in words: | |
categories[word].add(legal_issue_area.strip()) | |
# Indexing | |
index = defaultdict(list) | |
for entry in data: | |
legal_issue_area = entry["legal_issue_area"] | |
url = entry["url"] | |
words = re.findall(r'\w+', legal_issue_area.lower()) | |
for word in words: | |
index[word].append((legal_issue_area, url)) | |
def search(query): | |
query = query.lower() | |
words = re.findall(r'\w+', query) | |
results = [] | |
for word in words: | |
if word in index: | |
results.extend(index[word]) | |
results = list(set(results)) # Remove duplicates | |
# Ranking results based on relevance | |
ranked_results = [] | |
for legal_issue_area, url in results: | |
score = sum(1 for word in words if word in legal_issue_area.lower()) | |
ranked_results.append((score, legal_issue_area, url)) | |
ranked_results.sort(reverse=True) | |
formatted_results = [] | |
for score, legal_issue_area, url in ranked_results: | |
category_words = [word for word in re.findall(r'\w+', legal_issue_area.lower()) if word in categories] | |
category = ", ".join(category_words) | |
formatted_results.append(f"- {legal_issue_area} ({category}): {url}") | |
return "\n".join(formatted_results) | |
def search_interface(query): | |
if not query: | |
return "Please enter a query." | |
return search(query) | |
iface = gr.Interface( | |
fn=search_interface, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."), | |
outputs=gr.Textbox(lines=20), | |
title="Legal Issues Search", | |
description="Enter a query to search for legal issues and their respective URLs." | |
) | |
iface.launch() | |