Spaces:
Running
Running
debug
Browse files- .gitignore +2 -1
- lrt/lrt.py +43 -11
- widgets/body.py +3 -2
- widgets/sidebar.py +2 -2
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
venv
|
2 |
test.py
|
3 |
.config.json
|
4 |
-
__pycache__
|
|
|
|
1 |
venv
|
2 |
test.py
|
3 |
.config.json
|
4 |
+
__pycache__
|
5 |
+
.idea
|
lrt/lrt.py
CHANGED
@@ -3,6 +3,9 @@ from typing import List
|
|
3 |
import textdistance as td
|
4 |
from .utils import UnionFind, ArticleList
|
5 |
from .academic_query import AcademicQuery
|
|
|
|
|
|
|
6 |
|
7 |
class LiteratureResearchTool:
|
8 |
def __init__(self, cluster_config: Configuration = None):
|
@@ -45,7 +48,6 @@ class LiteratureResearchTool:
|
|
45 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
46 |
best_k: int = 5,
|
47 |
loading_ctx_manager = None,
|
48 |
-
decorator: callable = None
|
49 |
):
|
50 |
|
51 |
|
@@ -68,27 +70,57 @@ class LiteratureResearchTool:
|
|
68 |
end_year: int,
|
69 |
best_k: int = 5
|
70 |
) -> (ClusterList,ArticleList):
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
75 |
clusters = self.__postprocess_clusters__(clusters)
|
76 |
-
return clusters,articles
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
articles = ArticleList.parse_arxiv_articles(
|
79 |
-
|
80 |
abstracts = articles.getAbstracts() # List[str]
|
81 |
-
clusters = self.cluster_pipeline(abstracts,best_k=best_k)
|
82 |
clusters = self.__postprocess_clusters__(clusters)
|
83 |
return clusters, articles
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
articles = ArticleList.parse_pwc_articles(
|
86 |
-
|
87 |
abstracts = articles.getAbstracts() # List[str]
|
88 |
-
clusters = self.cluster_pipeline(abstracts,best_k=best_k)
|
89 |
clusters = self.__postprocess_clusters__(clusters)
|
90 |
return clusters, articles
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
|
94 |
|
|
|
3 |
import textdistance as td
|
4 |
from .utils import UnionFind, ArticleList
|
5 |
from .academic_query import AcademicQuery
|
6 |
+
import streamlit as st
|
7 |
+
from tokenizers import Tokenizer
|
8 |
+
|
9 |
|
10 |
class LiteratureResearchTool:
|
11 |
def __init__(self, cluster_config: Configuration = None):
|
|
|
48 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
49 |
best_k: int = 5,
|
50 |
loading_ctx_manager = None,
|
|
|
51 |
):
|
52 |
|
53 |
|
|
|
70 |
end_year: int,
|
71 |
best_k: int = 5
|
72 |
) -> (ClusterList,ArticleList):
|
73 |
+
|
74 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
75 |
+
def ieee_process(
|
76 |
+
query: str,
|
77 |
+
num_papers: int,
|
78 |
+
start_year: int,
|
79 |
+
end_year: int,
|
80 |
+
best_k: int = 5
|
81 |
+
):
|
82 |
+
articles = ArticleList.parse_ieee_articles(
|
83 |
+
self.literature_search.ieee(query, start_year, end_year, num_papers)) # ArticleList
|
84 |
+
abstracts = articles.getAbstracts() # List[str]
|
85 |
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
86 |
clusters = self.__postprocess_clusters__(clusters)
|
87 |
+
return clusters, articles
|
88 |
+
|
89 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
90 |
+
def arxiv_process(
|
91 |
+
query: str,
|
92 |
+
num_papers: int,
|
93 |
+
best_k: int = 5
|
94 |
+
):
|
95 |
articles = ArticleList.parse_arxiv_articles(
|
96 |
+
self.literature_search.arxiv(query, num_papers)) # ArticleList
|
97 |
abstracts = articles.getAbstracts() # List[str]
|
98 |
+
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
99 |
clusters = self.__postprocess_clusters__(clusters)
|
100 |
return clusters, articles
|
101 |
+
|
102 |
+
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
|
103 |
+
def pwc_process(
|
104 |
+
query: str,
|
105 |
+
num_papers: int,
|
106 |
+
best_k: int = 5
|
107 |
+
):
|
108 |
articles = ArticleList.parse_pwc_articles(
|
109 |
+
self.literature_search.paper_with_code(query, num_papers)) # ArticleList
|
110 |
abstracts = articles.getAbstracts() # List[str]
|
111 |
+
clusters = self.cluster_pipeline(abstracts, best_k=best_k)
|
112 |
clusters = self.__postprocess_clusters__(clusters)
|
113 |
return clusters, articles
|
114 |
|
115 |
+
if platforn_name == 'IEEE':
|
116 |
+
return ieee_process(query,num_papers,start_year,end_year,best_k)
|
117 |
+
elif platforn_name == 'Arxiv':
|
118 |
+
return arxiv_process(query,num_papers,best_k)
|
119 |
+
elif platforn_name == 'Paper with Code':
|
120 |
+
return pwc_process(query,num_papers,best_k)
|
121 |
+
else:
|
122 |
+
raise RuntimeError('This platform is not supported. Please open an issue on the GitHub.')
|
123 |
+
|
124 |
|
125 |
|
126 |
|
widgets/body.py
CHANGED
@@ -60,8 +60,9 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
|
|
60 |
|
61 |
|
62 |
# lrt results
|
63 |
-
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,
|
64 |
-
|
|
|
65 |
for plat in platforms:
|
66 |
clusters, articles = next(generator)
|
67 |
print(clusters)
|
|
|
60 |
|
61 |
|
62 |
# lrt results
|
63 |
+
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,
|
64 |
+
# loading_ctx_manager= st.spinner,
|
65 |
+
)
|
66 |
for plat in platforms:
|
67 |
clusters, articles = next(generator)
|
68 |
print(clusters)
|
widgets/sidebar.py
CHANGED
@@ -38,13 +38,13 @@ def render_sidebar():
|
|
38 |
'IEEE',
|
39 |
# 'Google Scholar',
|
40 |
'Arxiv',
|
41 |
-
'
|
42 |
], default=[
|
43 |
# 'Elvsier',
|
44 |
'IEEE',
|
45 |
# 'Google Scholar',
|
46 |
'Arxiv',
|
47 |
-
'
|
48 |
])
|
49 |
|
50 |
|
|
|
38 |
'IEEE',
|
39 |
# 'Google Scholar',
|
40 |
'Arxiv',
|
41 |
+
'Paper with Code'
|
42 |
], default=[
|
43 |
# 'Elvsier',
|
44 |
'IEEE',
|
45 |
# 'Google Scholar',
|
46 |
'Arxiv',
|
47 |
+
'Paper with Code'
|
48 |
])
|
49 |
|
50 |
|