Spaces:
Running
Running
update
Browse files- lrt/__init__.py +2 -1
- lrt/utils/article.py +19 -1
- widgets/body.py +27 -19
lrt/__init__.py
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
from .lrt import LiteratureResearchTool
|
2 |
-
from .clustering import Configuration
|
|
|
|
1 |
from .lrt import LiteratureResearchTool
|
2 |
+
from .clustering import Configuration
|
3 |
+
from .utils import Article, ArticleList
|
lrt/utils/article.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from typing import List, Union, Optional
|
|
|
2 |
class Article:
|
3 |
'''
|
4 |
attributes:
|
@@ -31,6 +32,15 @@ class Article:
|
|
31 |
|
32 |
return ret
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
class ArticleList:
|
35 |
'''
|
36 |
list of articles
|
@@ -72,6 +82,12 @@ class ArticleList:
|
|
72 |
def __len__(self):
|
73 |
return len(self.__list__)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
@classmethod
|
76 |
def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
|
77 |
if isinstance(items,dict):
|
@@ -391,4 +407,6 @@ if __name__ == '__main__':
|
|
391 |
print(pwc_articles)
|
392 |
|
393 |
for i in ieee_articles:
|
394 |
-
print(i)
|
|
|
|
|
|
1 |
from typing import List, Union, Optional
|
2 |
+
import pandas as pd
|
3 |
class Article:
|
4 |
'''
|
5 |
attributes:
|
|
|
32 |
|
33 |
return ret
|
34 |
|
35 |
+
def getDict(self) -> dict:
|
36 |
+
return {
|
37 |
+
'title': self.title,
|
38 |
+
'authors': self.authors,
|
39 |
+
'abstract': self.abstract,
|
40 |
+
'url': self.url,
|
41 |
+
'publication_year': self.publication_year
|
42 |
+
}
|
43 |
+
|
44 |
class ArticleList:
|
45 |
'''
|
46 |
list of articles
|
|
|
82 |
def __len__(self):
|
83 |
return len(self.__list__)
|
84 |
|
85 |
+
def getDataFrame(self) ->pd.DataFrame:
|
86 |
+
return pd.DataFrame(
|
87 |
+
[x.getDict() for x in self.__list__]
|
88 |
+
)
|
89 |
+
|
90 |
+
|
91 |
@classmethod
|
92 |
def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
|
93 |
if isinstance(items,dict):
|
|
|
407 |
print(pwc_articles)
|
408 |
|
409 |
for i in ieee_articles:
|
410 |
+
print(i)
|
411 |
+
|
412 |
+
print(pwc_articles.getDataFrame())
|
widgets/body.py
CHANGED
@@ -1,16 +1,18 @@
|
|
1 |
import streamlit as st
|
2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
|
|
|
|
3 |
from lrt_instance import *
|
4 |
-
from pyecharts.charts import Bar
|
5 |
-
from pyecharts import options as opts
|
6 |
-
import streamlit.components.v1 as st_render
|
7 |
-
from .utils import generate_html_pyecharts
|
8 |
from .charts import build_bar_charts
|
9 |
|
10 |
def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
|
11 |
with st.spinner('Searching...'):
|
12 |
paperInGeneral = st.empty() # paper的大概
|
13 |
-
paperInGeneral_md = '''# Query Results Preview
|
14 |
We have found following papers for you! (displaying 5 papers for each literature platforms)
|
15 |
'''
|
16 |
if 'IEEE' in platforms:
|
@@ -67,28 +69,34 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
|
|
67 |
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
|
68 |
for i,plat in enumerate(platforms):
|
69 |
clusters, articles = next(generator)
|
70 |
-
st.markdown(f'''# {plat} Results''')
|
71 |
clusters.sort()
|
72 |
|
73 |
-
st.markdown(f'''## Clusters Overview''')
|
74 |
-
st.markdown(f'''
|
75 |
st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
|
76 |
-
'''
|
77 |
-
plot using pyecharts
|
78 |
-
bar = (
|
79 |
-
Bar()
|
80 |
-
.add_xaxis([f'Cluster {i + 1}' for i in range(len(clusters))])
|
81 |
-
.add_yaxis("number of papers", [len(c) for c in clusters])
|
82 |
-
.add_yaxis("number of keyphrases", [len(c.get_keyphrases()) for c in clusters])
|
83 |
-
)
|
84 |
-
html = generate_html_pyecharts(bar, 'tmp.html')
|
85 |
-
st_render.html(html, height=500, width=1000)
|
86 |
-
'''
|
87 |
st.bokeh_chart(build_bar_charts(
|
88 |
x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
|
89 |
y_names= ['Number of Papers', 'Number of Keyphrases'],
|
90 |
y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
|
91 |
))
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
|
|
|
1 |
import streamlit as st
|
2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
3 |
+
from lrt.clustering.clusters import SingleCluster
|
4 |
+
from lrt import ArticleList
|
5 |
from lrt_instance import *
|
6 |
+
# from pyecharts.charts import Bar
|
7 |
+
# from pyecharts import options as opts
|
8 |
+
# import streamlit.components.v1 as st_render
|
9 |
+
# from .utils import generate_html_pyecharts
|
10 |
from .charts import build_bar_charts
|
11 |
|
12 |
def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
|
13 |
with st.spinner('Searching...'):
|
14 |
paperInGeneral = st.empty() # paper的大概
|
15 |
+
paperInGeneral_md = '''# 0 Query Results Preview
|
16 |
We have found following papers for you! (displaying 5 papers for each literature platforms)
|
17 |
'''
|
18 |
if 'IEEE' in platforms:
|
|
|
69 |
generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
|
70 |
for i,plat in enumerate(platforms):
|
71 |
clusters, articles = next(generator)
|
72 |
+
st.markdown(f'''# {i+1} {plat} Results''')
|
73 |
clusters.sort()
|
74 |
|
75 |
+
st.markdown(f'''## {i+1}.1 Clusters Overview''')
|
76 |
+
st.markdown(f'''In this section we show the overview of the clusters, more specifically,''')
|
77 |
st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
st.bokeh_chart(build_bar_charts(
|
79 |
x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
|
80 |
y_names= ['Number of Papers', 'Number of Keyphrases'],
|
81 |
y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
|
82 |
))
|
83 |
|
84 |
+
st.markdown(f'''## {i+1}.2 Cluster Details''')
|
85 |
+
st.markdown(f'''In this section we show the details of each cluster, including''')
|
86 |
+
st.markdown(f'''\n- the article information in the cluster\n- the keyphrases of the cluster''')
|
87 |
+
for j,cluster in enumerate(clusters):
|
88 |
+
assert isinstance(cluster,SingleCluster) #TODO: remove this line
|
89 |
+
ids = cluster.elements()
|
90 |
+
articles_in_cluster = ArticleList([articles[id] for id in ids])
|
91 |
+
st.markdown(f'''**Cluster {j + 1}**''')
|
92 |
+
st.dataframe(articles_in_cluster.getDataFrame())
|
93 |
+
st.markdown(f'''The top 5 keyphrases of this cluster are:''')
|
94 |
+
md = ''
|
95 |
+
for keyphrase in cluster.top_5_keyphrases:
|
96 |
+
md += f'''- `{keyphrase}`\n'''
|
97 |
+
st.markdown(md)
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
|
102 |
|