layperson99 commited on
Commit
90bee4f
1 Parent(s): 9b71539

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from collections import defaultdict
4
+
5
+ # Load and preprocess the data
6
+ data = []
7
+ categories = defaultdict(set)
8
+
9
+ # Assuming the data loading part is kept outside a function so it loads once
10
+ with open("Legal Issues Sheet1.csv", "r") as file:
11
+ next(file) # Skip the header line
12
+ for line in file:
13
+ url, legal_issue_area = line.strip().split(",", 1)
14
+ data.append({"legal_issue_area": legal_issue_area.strip(), "url": url.strip()})
15
+
16
+ # Categorization and Tagging
17
+ words = re.findall(r'\w+', legal_issue_area.lower())
18
+ for word in words:
19
+ categories[word].add(legal_issue_area.strip())
20
+
21
+ # Indexing
22
+ index = defaultdict(list)
23
+ for entry in data:
24
+ legal_issue_area = entry["legal_issue_area"]
25
+ url = entry["url"]
26
+ words = re.findall(r'\w+', legal_issue_area.lower())
27
+ for word in words:
28
+ index[word].append((legal_issue_area, url))
29
+
30
+ def search(query):
31
+ query = query.lower()
32
+ words = re.findall(r'\w+', query)
33
+ results = []
34
+ for word in words:
35
+ if word in index:
36
+ results.extend(index[word])
37
+ results = list(set(results)) # Remove duplicates
38
+
39
+ # Ranking results based on relevance
40
+ ranked_results = []
41
+ for legal_issue_area, url in results:
42
+ score = sum(1 for word in words if word in legal_issue_area.lower())
43
+ ranked_results.append((score, legal_issue_area, url))
44
+ ranked_results.sort(reverse=True)
45
+
46
+ formatted_results = []
47
+ for score, legal_issue_area, url in ranked_results:
48
+ category_words = [word for word in re.findall(r'\w+', legal_issue_area.lower()) if word in categories]
49
+ category = ", ".join(category_words)
50
+ formatted_results.append(f"- {legal_issue_area} ({category}): {url}")
51
+
52
+ return "\n".join(formatted_results)
53
+
54
+ def search_interface(query):
55
+ if not query:
56
+ return "Please enter a query."
57
+ return search(query)
58
+
59
+ iface = gr.Interface(
60
+ fn=search_interface,
61
+ inputs=gr.Textbox(lines=2, placeholder="Enter your search query here..."),
62
+ outputs=gr.Textbox(lines=20),
63
+ title="Legal Issues Search",
64
+ description="Enter a query to search for legal issues and their respective URLs."
65
+ )
66
+
67
+ iface.launch()