File size: 2,232 Bytes
90bee4f
 
 
 
 
 
 
 
 
cb1eab8
90bee4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import re
from collections import defaultdict

# Load and preprocess the data
data = []
categories = defaultdict(set)

# Assuming the data loading part is kept outside a function so it loads once
with open("Legal issues.csv", "r") as file:
    next(file)  # Skip the header line
    for line in file:
        url, legal_issue_area = line.strip().split(",", 1)
        data.append({"legal_issue_area": legal_issue_area.strip(), "url": url.strip()})
        
        # Categorization and Tagging
        words = re.findall(r'\w+', legal_issue_area.lower())
        for word in words:
            categories[word].add(legal_issue_area.strip())

# Indexing
index = defaultdict(list)
for entry in data:
    legal_issue_area = entry["legal_issue_area"]
    url = entry["url"]
    words = re.findall(r'\w+', legal_issue_area.lower())
    for word in words:
        index[word].append((legal_issue_area, url))

def search(query):
    query = query.lower()
    words = re.findall(r'\w+', query)
    results = []
    for word in words:
        if word in index:
            results.extend(index[word])
    results = list(set(results))  # Remove duplicates
    
    # Ranking results based on relevance
    ranked_results = []
    for legal_issue_area, url in results:
        score = sum(1 for word in words if word in legal_issue_area.lower())
        ranked_results.append((score, legal_issue_area, url))
    ranked_results.sort(reverse=True)
    
    formatted_results = []
    for score, legal_issue_area, url in ranked_results:
        category_words = [word for word in re.findall(r'\w+', legal_issue_area.lower()) if word in categories]
        category = ", ".join(category_words)
        formatted_results.append(f"- {legal_issue_area} ({category}): {url}")
    
    return "\n".join(formatted_results)

def search_interface(query):
    if not query:
        return "Please enter a query."
    return search(query)

iface = gr.Interface(
    fn=search_interface,
    inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
    outputs=gr.Textbox(lines=20),
    title="Legal Issues Search",
    description="Enter a query to search for legal issues and their respective URLs."
)

iface.launch()