Adapting commited on
Commit
79b1dce
1 Parent(s): 469542a
Files changed (4) hide show
  1. .gitignore +2 -1
  2. lrt/lrt.py +43 -11
  3. widgets/body.py +3 -2
  4. widgets/sidebar.py +2 -2
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  venv
2
  test.py
3
  .config.json
4
- __pycache__
 
 
1
  venv
2
  test.py
3
  .config.json
4
+ __pycache__
5
+ .idea
lrt/lrt.py CHANGED
@@ -3,6 +3,9 @@ from typing import List
3
  import textdistance as td
4
  from .utils import UnionFind, ArticleList
5
  from .academic_query import AcademicQuery
 
 
 
6
 
7
  class LiteratureResearchTool:
8
  def __init__(self, cluster_config: Configuration = None):
@@ -45,7 +48,6 @@ class LiteratureResearchTool:
45
  platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
46
  best_k: int = 5,
47
  loading_ctx_manager = None,
48
- decorator: callable = None
49
  ):
50
 
51
 
@@ -68,27 +70,57 @@ class LiteratureResearchTool:
68
  end_year: int,
69
  best_k: int = 5
70
  ) -> (ClusterList,ArticleList):
71
- if platforn_name == 'IEEE':
72
- articles = ArticleList.parse_ieee_articles(self.literature_search.ieee(query,start_year,end_year,num_papers)) # ArticleList
73
- abstracts = articles.getAbstracts() # List[str]
 
 
 
 
 
 
 
 
 
74
  clusters = self.cluster_pipeline(abstracts, best_k=best_k)
75
  clusters = self.__postprocess_clusters__(clusters)
76
- return clusters,articles
77
- elif platforn_name == 'Arxiv':
 
 
 
 
 
 
78
  articles = ArticleList.parse_arxiv_articles(
79
- self.literature_search.arxiv(query, num_papers)) # ArticleList
80
  abstracts = articles.getAbstracts() # List[str]
81
- clusters = self.cluster_pipeline(abstracts,best_k=best_k)
82
  clusters = self.__postprocess_clusters__(clusters)
83
  return clusters, articles
84
- elif platforn_name == 'Paper with Code':
 
 
 
 
 
 
85
  articles = ArticleList.parse_pwc_articles(
86
- self.literature_search.paper_with_code(query, num_papers)) # ArticleList
87
  abstracts = articles.getAbstracts() # List[str]
88
- clusters = self.cluster_pipeline(abstracts,best_k=best_k)
89
  clusters = self.__postprocess_clusters__(clusters)
90
  return clusters, articles
91
 
 
 
 
 
 
 
 
 
 
92
 
93
 
94
 
 
3
  import textdistance as td
4
  from .utils import UnionFind, ArticleList
5
  from .academic_query import AcademicQuery
6
+ import streamlit as st
7
+ from tokenizers import Tokenizer
8
+
9
 
10
  class LiteratureResearchTool:
11
  def __init__(self, cluster_config: Configuration = None):
 
48
  platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
49
  best_k: int = 5,
50
  loading_ctx_manager = None,
 
51
  ):
52
 
53
 
 
70
  end_year: int,
71
  best_k: int = 5
72
  ) -> (ClusterList,ArticleList):
73
+
74
+ @st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
75
+ def ieee_process(
76
+ query: str,
77
+ num_papers: int,
78
+ start_year: int,
79
+ end_year: int,
80
+ best_k: int = 5
81
+ ):
82
+ articles = ArticleList.parse_ieee_articles(
83
+ self.literature_search.ieee(query, start_year, end_year, num_papers)) # ArticleList
84
+ abstracts = articles.getAbstracts() # List[str]
85
  clusters = self.cluster_pipeline(abstracts, best_k=best_k)
86
  clusters = self.__postprocess_clusters__(clusters)
87
+ return clusters, articles
88
+
89
+ @st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
90
+ def arxiv_process(
91
+ query: str,
92
+ num_papers: int,
93
+ best_k: int = 5
94
+ ):
95
  articles = ArticleList.parse_arxiv_articles(
96
+ self.literature_search.arxiv(query, num_papers)) # ArticleList
97
  abstracts = articles.getAbstracts() # List[str]
98
+ clusters = self.cluster_pipeline(abstracts, best_k=best_k)
99
  clusters = self.__postprocess_clusters__(clusters)
100
  return clusters, articles
101
+
102
+ @st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__})
103
+ def pwc_process(
104
+ query: str,
105
+ num_papers: int,
106
+ best_k: int = 5
107
+ ):
108
  articles = ArticleList.parse_pwc_articles(
109
+ self.literature_search.paper_with_code(query, num_papers)) # ArticleList
110
  abstracts = articles.getAbstracts() # List[str]
111
+ clusters = self.cluster_pipeline(abstracts, best_k=best_k)
112
  clusters = self.__postprocess_clusters__(clusters)
113
  return clusters, articles
114
 
115
+ if platforn_name == 'IEEE':
116
+ return ieee_process(query,num_papers,start_year,end_year,best_k)
117
+ elif platforn_name == 'Arxiv':
118
+ return arxiv_process(query,num_papers,best_k)
119
+ elif platforn_name == 'Paper with Code':
120
+ return pwc_process(query,num_papers,best_k)
121
+ else:
122
+ raise RuntimeError('This platform is not supported. Please open an issue on the GitHub.')
123
+
124
 
125
 
126
 
widgets/body.py CHANGED
@@ -60,8 +60,9 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
60
 
61
 
62
  # lrt results
63
- generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,loading_ctx_manager= st.spinner,
64
- decorator= st.cache)
 
65
  for plat in platforms:
66
  clusters, articles = next(generator)
67
  print(clusters)
 
60
 
61
 
62
  # lrt results
63
+ generator = baseline_lrt(query_input,num_papers,start_year,end_year,platforms, best_k=k,
64
+ # loading_ctx_manager= st.spinner,
65
+ )
66
  for plat in platforms:
67
  clusters, articles = next(generator)
68
  print(clusters)
widgets/sidebar.py CHANGED
@@ -38,13 +38,13 @@ def render_sidebar():
38
  'IEEE',
39
  # 'Google Scholar',
40
  'Arxiv',
41
- 'PaperWithCode'
42
  ], default=[
43
  # 'Elvsier',
44
  'IEEE',
45
  # 'Google Scholar',
46
  'Arxiv',
47
- 'PaperWithCode'
48
  ])
49
 
50
 
 
38
  'IEEE',
39
  # 'Google Scholar',
40
  'Arxiv',
41
+ 'Paper with Code'
42
  ], default=[
43
  # 'Elvsier',
44
  'IEEE',
45
  # 'Google Scholar',
46
  'Arxiv',
47
+ 'Paper with Code'
48
  ])
49
 
50