Adapting commited on
Commit
4837f95
1 Parent(s): 247c4e3
Files changed (3) hide show
  1. lrt/__init__.py +2 -1
  2. lrt/utils/article.py +19 -1
  3. widgets/body.py +27 -19
lrt/__init__.py CHANGED
@@ -1,2 +1,3 @@
1
  from .lrt import LiteratureResearchTool
2
- from .clustering import Configuration
 
 
1
  from .lrt import LiteratureResearchTool
2
+ from .clustering import Configuration
3
+ from .utils import Article, ArticleList
lrt/utils/article.py CHANGED
@@ -1,4 +1,5 @@
1
  from typing import List, Union, Optional
 
2
  class Article:
3
  '''
4
  attributes:
@@ -31,6 +32,15 @@ class Article:
31
 
32
  return ret
33
 
 
 
 
 
 
 
 
 
 
34
  class ArticleList:
35
  '''
36
  list of articles
@@ -72,6 +82,12 @@ class ArticleList:
72
  def __len__(self):
73
  return len(self.__list__)
74
 
 
 
 
 
 
 
75
  @classmethod
76
  def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
77
  if isinstance(items,dict):
@@ -391,4 +407,6 @@ if __name__ == '__main__':
391
  print(pwc_articles)
392
 
393
  for i in ieee_articles:
394
- print(i)
 
 
 
1
  from typing import List, Union, Optional
2
+ import pandas as pd
3
  class Article:
4
  '''
5
  attributes:
 
32
 
33
  return ret
34
 
35
+ def getDict(self) -> dict:
36
+ return {
37
+ 'title': self.title,
38
+ 'authors': self.authors,
39
+ 'abstract': self.abstract,
40
+ 'url': self.url,
41
+ 'publication_year': self.publication_year
42
+ }
43
+
44
  class ArticleList:
45
  '''
46
  list of articles
 
82
  def __len__(self):
83
  return len(self.__list__)
84
 
85
+ def getDataFrame(self) ->pd.DataFrame:
86
+ return pd.DataFrame(
87
+ [x.getDict() for x in self.__list__]
88
+ )
89
+
90
+
91
  @classmethod
92
  def parse_ieee_articles(cls,items: Union[dict, List[dict]]):
93
  if isinstance(items,dict):
 
407
  print(pwc_articles)
408
 
409
  for i in ieee_articles:
410
+ print(i)
411
+
412
+ print(pwc_articles.getDataFrame())
widgets/body.py CHANGED
@@ -1,16 +1,18 @@
1
  import streamlit as st
2
  from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
 
 
3
  from lrt_instance import *
4
- from pyecharts.charts import Bar
5
- from pyecharts import options as opts
6
- import streamlit.components.v1 as st_render
7
- from .utils import generate_html_pyecharts
8
  from .charts import build_bar_charts
9
 
10
  def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
11
  with st.spinner('Searching...'):
12
  paperInGeneral = st.empty() # paper的大概
13
- paperInGeneral_md = '''# Query Results Preview
14
  We have found following papers for you! (displaying 5 papers for each literature platforms)
15
  '''
16
  if 'IEEE' in platforms:
@@ -67,28 +69,34 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
67
  generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
68
  for i,plat in enumerate(platforms):
69
  clusters, articles = next(generator)
70
- st.markdown(f'''# {plat} Results''')
71
  clusters.sort()
72
 
73
- st.markdown(f'''## Clusters Overview''')
74
- st.markdown(f'''Here we show the overview of the clusters, more specifically,''')
75
  st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
76
- '''
77
- plot using pyecharts
78
- bar = (
79
- Bar()
80
- .add_xaxis([f'Cluster {i + 1}' for i in range(len(clusters))])
81
- .add_yaxis("number of papers", [len(c) for c in clusters])
82
- .add_yaxis("number of keyphrases", [len(c.get_keyphrases()) for c in clusters])
83
- )
84
- html = generate_html_pyecharts(bar, 'tmp.html')
85
- st_render.html(html, height=500, width=1000)
86
- '''
87
  st.bokeh_chart(build_bar_charts(
88
  x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
89
  y_names= ['Number of Papers', 'Number of Keyphrases'],
90
  y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
91
  ))
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
 
 
1
  import streamlit as st
2
  from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
3
+ from lrt.clustering.clusters import SingleCluster
4
+ from lrt import ArticleList
5
  from lrt_instance import *
6
+ # from pyecharts.charts import Bar
7
+ # from pyecharts import options as opts
8
+ # import streamlit.components.v1 as st_render
9
+ # from .utils import generate_html_pyecharts
10
  from .charts import build_bar_charts
11
 
12
  def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
13
  with st.spinner('Searching...'):
14
  paperInGeneral = st.empty() # paper的大概
15
+ paperInGeneral_md = '''# 0 Query Results Preview
16
  We have found following papers for you! (displaying 5 papers for each literature platforms)
17
  '''
18
  if 'IEEE' in platforms:
 
69
  generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k)
70
  for i,plat in enumerate(platforms):
71
  clusters, articles = next(generator)
72
+ st.markdown(f'''# {i+1} {plat} Results''')
73
  clusters.sort()
74
 
75
+ st.markdown(f'''## {i+1}.1 Clusters Overview''')
76
+ st.markdown(f'''In this section we show the overview of the clusters, more specifically,''')
77
  st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
 
 
 
 
 
 
 
 
 
 
 
78
  st.bokeh_chart(build_bar_charts(
79
  x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
80
  y_names= ['Number of Papers', 'Number of Keyphrases'],
81
  y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
82
  ))
83
 
84
+ st.markdown(f'''## {i+1}.2 Cluster Details''')
85
+ st.markdown(f'''In this section we show the details of each cluster, including''')
86
+ st.markdown(f'''\n- the article information in the cluster\n- the keyphrases of the cluster''')
87
+ for j,cluster in enumerate(clusters):
88
+ assert isinstance(cluster,SingleCluster) #TODO: remove this line
89
+ ids = cluster.elements()
90
+ articles_in_cluster = ArticleList([articles[id] for id in ids])
91
+ st.markdown(f'''**Cluster {j + 1}**''')
92
+ st.dataframe(articles_in_cluster.getDataFrame())
93
+ st.markdown(f'''The top 5 keyphrases of this cluster are:''')
94
+ md = ''
95
+ for keyphrase in cluster.top_5_keyphrases:
96
+ md += f'''- `{keyphrase}`\n'''
97
+ st.markdown(md)
98
+
99
+
100
+
101
 
102