import gradio as gr import re from collections import defaultdict # Load and preprocess the data data = [] categories = defaultdict(set) # Assuming the data loading part is kept outside a function so it loads once with open("Legal issues.csv", "r") as file: next(file) # Skip the header line for line in file: url, legal_issue_area = line.strip().split(",", 1) data.append({"legal_issue_area": legal_issue_area.strip(), "url": url.strip()}) # Categorization and Tagging words = re.findall(r'\w+', legal_issue_area.lower()) for word in words: categories[word].add(legal_issue_area.strip()) # Indexing index = defaultdict(list) for entry in data: legal_issue_area = entry["legal_issue_area"] url = entry["url"] words = re.findall(r'\w+', legal_issue_area.lower()) for word in words: index[word].append((legal_issue_area, url)) def search(query): query = query.lower() words = re.findall(r'\w+', query) results = [] for word in words: if word in index: results.extend(index[word]) results = list(set(results)) # Remove duplicates # Ranking results based on relevance ranked_results = [] for legal_issue_area, url in results: score = sum(1 for word in words if word in legal_issue_area.lower()) ranked_results.append((score, legal_issue_area, url)) ranked_results.sort(reverse=True) formatted_results = [] for score, legal_issue_area, url in ranked_results: category_words = [word for word in re.findall(r'\w+', legal_issue_area.lower()) if word in categories] category = ", ".join(category_words) formatted_results.append(f"- {legal_issue_area} ({category}): {url}") return "\n".join(formatted_results) def search_interface(query): if not query: return "Please enter a query." return search(query) iface = gr.Interface( fn=search_interface, inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."), outputs=gr.Textbox(lines=20), title="Legal Issues Search", description="Enter a query to search for legal issues and their respective URLs." ) iface.launch()