Spaces:
Running
Running
v1.1.0
Browse files- app.py +2 -3
- lrt/clustering/clustering_pipeline.py +16 -13
- lrt/clustering/config.py +1 -1
- lrt/lrt.py +10 -13
- lrt/utils/dimension_reduction.py +17 -0
- lrt/utils/functions.py +5 -1
- lrt_instance/instances.py +2 -1
- scripts/tests/lrt_test_run.py +1 -1
- setup.py +1 -1
- widgets/body.py +16 -3
- widgets/sidebar.py +28 -8
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import streamlit as st
|
2 |
from widgets import *
|
3 |
-
from lrt_instance import *
|
4 |
|
5 |
|
6 |
# [![github](https://img.kookapp.cn/assets/2022-09/1w4G0FIWGK00w00w.png)](https://github.com/Mondkuchen/idp_LiteratureResearch_Tool)
|
7 |
|
8 |
# sidebar content
|
9 |
-
platforms, number_papers,start_year,end_year,
|
10 |
|
11 |
# body head
|
12 |
with st.form("my_form",clear_on_submit=False):
|
@@ -26,7 +25,7 @@ with st.form("my_form",clear_on_submit=False):
|
|
26 |
|
27 |
if submitted:
|
28 |
# body
|
29 |
-
render_body(platforms, number_papers, 5, query_input, show_preview,start_year,end_year,
|
30 |
# '''
|
31 |
# bar = (
|
32 |
# Bar()
|
|
|
1 |
import streamlit as st
|
2 |
from widgets import *
|
|
|
3 |
|
4 |
|
5 |
# [![github](https://img.kookapp.cn/assets/2022-09/1w4G0FIWGK00w00w.png)](https://github.com/Mondkuchen/idp_LiteratureResearch_Tool)
|
6 |
|
7 |
# sidebar content
|
8 |
+
platforms, number_papers,start_year,end_year, clustering_params = render_sidebar()
|
9 |
|
10 |
# body head
|
11 |
with st.form("my_form",clear_on_submit=False):
|
|
|
25 |
|
26 |
if submitted:
|
27 |
# body
|
28 |
+
render_body(platforms, number_papers, 5, query_input, show_preview,start_year,end_year, clustering_params)
|
29 |
# '''
|
30 |
# bar = (
|
31 |
# Bar()
|
lrt/clustering/clustering_pipeline.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
from typing import List
|
2 |
from .config import BaselineConfig, Configuration
|
3 |
from ..utils import __create_model__
|
4 |
-
|
5 |
from sklearn.cluster import KMeans
|
6 |
-
|
7 |
from .clusters import ClusterList
|
8 |
|
9 |
class ClusterPipeline:
|
@@ -15,7 +15,7 @@ class ClusterPipeline:
|
|
15 |
|
16 |
def __setup__(self, config:Configuration):
|
17 |
self.PTM = __create_model__(config.plm)
|
18 |
-
self.dimension_reduction = __create_model__(config.dimension_reduction)
|
19 |
self.clustering = __create_model__(config.clustering)
|
20 |
self.keywords_extraction = __create_model__(config.keywords_extraction)
|
21 |
|
@@ -38,9 +38,11 @@ class ClusterPipeline:
|
|
38 |
if self.dimension_reduction is None:
|
39 |
return embeddings
|
40 |
print(f'>>> start dimension reduction...')
|
|
|
41 |
print(f'>>> finished dimension reduction...')
|
|
|
42 |
|
43 |
-
def __3_clustering__(self, embeddings, return_cluster_centers = False,
|
44 |
'''
|
45 |
|
46 |
:param embeddings: Nxd
|
@@ -51,13 +53,14 @@ class ClusterPipeline:
|
|
51 |
else:
|
52 |
print(f'>>> start clustering...')
|
53 |
model = KMeans()
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
#
|
60 |
-
|
|
|
61 |
|
62 |
labels, cluster_centers = self.clustering(embeddings, k=best_k)
|
63 |
clusters = ClusterList(best_k)
|
@@ -90,11 +93,11 @@ class ClusterPipeline:
|
|
90 |
return clusters
|
91 |
|
92 |
|
93 |
-
def __call__(self, documents: List[str],
|
94 |
print(f'>>> pipeline starts...')
|
95 |
x = self.__1_generate_word_embeddings__(documents)
|
96 |
x = self.__2_dimenstion_reduction__(x)
|
97 |
-
clusters = self.__3_clustering__(x,
|
98 |
outputs = self.__4_keywords_extraction__(clusters, documents)
|
99 |
print(f'>>> pipeline finished!\n')
|
100 |
return outputs
|
|
|
1 |
from typing import List
|
2 |
from .config import BaselineConfig, Configuration
|
3 |
from ..utils import __create_model__
|
4 |
+
import numpy as np
|
5 |
from sklearn.cluster import KMeans
|
6 |
+
from yellowbrick.cluster import KElbowVisualizer
|
7 |
from .clusters import ClusterList
|
8 |
|
9 |
class ClusterPipeline:
|
|
|
15 |
|
16 |
def __setup__(self, config:Configuration):
|
17 |
self.PTM = __create_model__(config.plm)
|
18 |
+
self.dimension_reduction = __create_model__(config.dimension_reduction)
|
19 |
self.clustering = __create_model__(config.clustering)
|
20 |
self.keywords_extraction = __create_model__(config.keywords_extraction)
|
21 |
|
|
|
38 |
if self.dimension_reduction is None:
|
39 |
return embeddings
|
40 |
print(f'>>> start dimension reduction...')
|
41 |
+
embeddings = self.dimension_reduction.dimension_reduction(embeddings)
|
42 |
print(f'>>> finished dimension reduction...')
|
43 |
+
return embeddings
|
44 |
|
45 |
+
def __3_clustering__(self, embeddings, return_cluster_centers = False, max_k: int =10):
|
46 |
'''
|
47 |
|
48 |
:param embeddings: Nxd
|
|
|
53 |
else:
|
54 |
print(f'>>> start clustering...')
|
55 |
model = KMeans()
|
56 |
+
visualizer = KElbowVisualizer(
|
57 |
+
model, k=(2, max_k+1), metric='silhouette', timings=False, locate_elbow=False
|
58 |
+
)
|
59 |
+
|
60 |
+
visualizer.fit(embeddings)
|
61 |
+
# visualizer.show()
|
62 |
+
best_k = visualizer.k_values_[np.argmax(np.array(visualizer.k_scores_))]
|
63 |
+
print(f'>>> The best K is {best_k}.')
|
64 |
|
65 |
labels, cluster_centers = self.clustering(embeddings, k=best_k)
|
66 |
clusters = ClusterList(best_k)
|
|
|
93 |
return clusters
|
94 |
|
95 |
|
96 |
+
def __call__(self, documents: List[str], max_k:int):
|
97 |
print(f'>>> pipeline starts...')
|
98 |
x = self.__1_generate_word_embeddings__(documents)
|
99 |
x = self.__2_dimenstion_reduction__(x)
|
100 |
+
clusters = self.__3_clustering__(x,max_k=max_k)
|
101 |
outputs = self.__4_keywords_extraction__(clusters, documents)
|
102 |
print(f'>>> pipeline finished!\n')
|
103 |
return outputs
|
lrt/clustering/config.py
CHANGED
@@ -8,4 +8,4 @@ class Configuration:
|
|
8 |
|
9 |
class BaselineConfig(Configuration):
|
10 |
def __init__(self):
|
11 |
-
super().__init__('''all-mpnet-base-v2''', 'none', 'kmeans-euclidean', 'keyphrase-transformer')
|
|
|
8 |
|
9 |
class BaselineConfig(Configuration):
|
10 |
def __init__(self):
|
11 |
+
super().__init__('''all-mpnet-base-v2''', 'none', 'kmeans-euclidean', 'keyphrase-transformer')
|
lrt/lrt.py
CHANGED
@@ -46,8 +46,8 @@ class LiteratureResearchTool:
|
|
46 |
num_papers: int,
|
47 |
start_year: int,
|
48 |
end_year: int,
|
|
|
49 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
50 |
-
best_k: int = 5,
|
51 |
loading_ctx_manager = None,
|
52 |
):
|
53 |
|
@@ -55,9 +55,9 @@ class LiteratureResearchTool:
|
|
55 |
for platform in platforms:
|
56 |
if loading_ctx_manager:
|
57 |
with loading_ctx_manager():
|
58 |
-
clusters, articles = self.__platformPipeline__(platform,query,num_papers,start_year,end_year,
|
59 |
else:
|
60 |
-
clusters, articles = self.__platformPipeline__(platform, query, num_papers, start_year, end_year,
|
61 |
|
62 |
clusters.sort()
|
63 |
yield clusters,articles
|
@@ -69,7 +69,7 @@ class LiteratureResearchTool:
|
|
69 |
num_papers: int,
|
70 |
start_year: int,
|
71 |
end_year: int,
|
72 |
-
|
73 |
) -> (ClusterList,ArticleList):
|
74 |
|
75 |
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__},allow_output_mutation=True)
|
@@ -78,12 +78,11 @@ class LiteratureResearchTool:
|
|
78 |
num_papers: int,
|
79 |
start_year: int,
|
80 |
end_year: int,
|
81 |
-
best_k: int = 5
|
82 |
):
|
83 |
articles = ArticleList.parse_ieee_articles(
|
84 |
self.literature_search.ieee(query, start_year, end_year, num_papers)) # ArticleList
|
85 |
abstracts = articles.getAbstracts() # List[str]
|
86 |
-
clusters = self.cluster_pipeline(abstracts,
|
87 |
clusters = self.__postprocess_clusters__(clusters)
|
88 |
return clusters, articles
|
89 |
|
@@ -91,12 +90,11 @@ class LiteratureResearchTool:
|
|
91 |
def arxiv_process(
|
92 |
query: str,
|
93 |
num_papers: int,
|
94 |
-
best_k: int = 5
|
95 |
):
|
96 |
articles = ArticleList.parse_arxiv_articles(
|
97 |
self.literature_search.arxiv(query, num_papers)) # ArticleList
|
98 |
abstracts = articles.getAbstracts() # List[str]
|
99 |
-
clusters = self.cluster_pipeline(abstracts,
|
100 |
clusters = self.__postprocess_clusters__(clusters)
|
101 |
return clusters, articles
|
102 |
|
@@ -104,21 +102,20 @@ class LiteratureResearchTool:
|
|
104 |
def pwc_process(
|
105 |
query: str,
|
106 |
num_papers: int,
|
107 |
-
best_k: int = 5
|
108 |
):
|
109 |
articles = ArticleList.parse_pwc_articles(
|
110 |
self.literature_search.paper_with_code(query, num_papers)) # ArticleList
|
111 |
abstracts = articles.getAbstracts() # List[str]
|
112 |
-
clusters = self.cluster_pipeline(abstracts,
|
113 |
clusters = self.__postprocess_clusters__(clusters)
|
114 |
return clusters, articles
|
115 |
|
116 |
if platforn_name == 'IEEE':
|
117 |
-
return ieee_process(query,num_papers,start_year,end_year
|
118 |
elif platforn_name == 'Arxiv':
|
119 |
-
return arxiv_process(query,num_papers
|
120 |
elif platforn_name == 'Paper with Code':
|
121 |
-
return pwc_process(query,num_papers
|
122 |
else:
|
123 |
raise RuntimeError('This platform is not supported. Please open an issue on the GitHub.')
|
124 |
|
|
|
46 |
num_papers: int,
|
47 |
start_year: int,
|
48 |
end_year: int,
|
49 |
+
max_k: int,
|
50 |
platforms: List[str] = ['IEEE', 'Arxiv', 'Paper with Code'],
|
|
|
51 |
loading_ctx_manager = None,
|
52 |
):
|
53 |
|
|
|
55 |
for platform in platforms:
|
56 |
if loading_ctx_manager:
|
57 |
with loading_ctx_manager():
|
58 |
+
clusters, articles = self.__platformPipeline__(platform,query,num_papers,start_year,end_year,max_k)
|
59 |
else:
|
60 |
+
clusters, articles = self.__platformPipeline__(platform, query, num_papers, start_year, end_year,max_k)
|
61 |
|
62 |
clusters.sort()
|
63 |
yield clusters,articles
|
|
|
69 |
num_papers: int,
|
70 |
start_year: int,
|
71 |
end_year: int,
|
72 |
+
max_k: int
|
73 |
) -> (ClusterList,ArticleList):
|
74 |
|
75 |
@st.cache(hash_funcs={Tokenizer: Tokenizer.__hash__},allow_output_mutation=True)
|
|
|
78 |
num_papers: int,
|
79 |
start_year: int,
|
80 |
end_year: int,
|
|
|
81 |
):
|
82 |
articles = ArticleList.parse_ieee_articles(
|
83 |
self.literature_search.ieee(query, start_year, end_year, num_papers)) # ArticleList
|
84 |
abstracts = articles.getAbstracts() # List[str]
|
85 |
+
clusters = self.cluster_pipeline(abstracts,max_k)
|
86 |
clusters = self.__postprocess_clusters__(clusters)
|
87 |
return clusters, articles
|
88 |
|
|
|
90 |
def arxiv_process(
|
91 |
query: str,
|
92 |
num_papers: int,
|
|
|
93 |
):
|
94 |
articles = ArticleList.parse_arxiv_articles(
|
95 |
self.literature_search.arxiv(query, num_papers)) # ArticleList
|
96 |
abstracts = articles.getAbstracts() # List[str]
|
97 |
+
clusters = self.cluster_pipeline(abstracts,max_k)
|
98 |
clusters = self.__postprocess_clusters__(clusters)
|
99 |
return clusters, articles
|
100 |
|
|
|
102 |
def pwc_process(
|
103 |
query: str,
|
104 |
num_papers: int,
|
|
|
105 |
):
|
106 |
articles = ArticleList.parse_pwc_articles(
|
107 |
self.literature_search.paper_with_code(query, num_papers)) # ArticleList
|
108 |
abstracts = articles.getAbstracts() # List[str]
|
109 |
+
clusters = self.cluster_pipeline(abstracts,max_k)
|
110 |
clusters = self.__postprocess_clusters__(clusters)
|
111 |
return clusters, articles
|
112 |
|
113 |
if platforn_name == 'IEEE':
|
114 |
+
return ieee_process(query,num_papers,start_year,end_year)
|
115 |
elif platforn_name == 'Arxiv':
|
116 |
+
return arxiv_process(query,num_papers)
|
117 |
elif platforn_name == 'Paper with Code':
|
118 |
+
return pwc_process(query,num_papers)
|
119 |
else:
|
120 |
raise RuntimeError('This platform is not supported. Please open an issue on the GitHub.')
|
121 |
|
lrt/utils/dimension_reduction.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.decomposition import PCA as pca
|
2 |
+
|
3 |
+
|
4 |
+
class BaseDimensionReduction:
|
5 |
+
def dimension_reduction(self,X):
|
6 |
+
raise NotImplementedError()
|
7 |
+
|
8 |
+
class PCA(BaseDimensionReduction):
|
9 |
+
def __init__(self, n_components: int = 0.8, *args, **kwargs) -> None:
|
10 |
+
super().__init__()
|
11 |
+
self.pca = pca(n_components,*args,**kwargs)
|
12 |
+
|
13 |
+
|
14 |
+
def dimension_reduction(self, X):
|
15 |
+
self.pca.fit(X=X)
|
16 |
+
print(f'>>> The reduced dimension is {self.pca.n_components_}.')
|
17 |
+
return self.pca.transform(X)
|
lrt/utils/functions.py
CHANGED
@@ -5,6 +5,7 @@ import torch
|
|
5 |
from sklearn.cluster import KMeans
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,Text2TextGenerationPipeline
|
7 |
from inference_hf import InferenceHF
|
|
|
8 |
|
9 |
class Template:
|
10 |
def __init__(self):
|
@@ -14,7 +15,7 @@ class Template:
|
|
14 |
'all-mpnet-base-v2':'''sentence-transformers/all-mpnet-base-v2'''
|
15 |
}
|
16 |
self.dimension_reduction = {
|
17 |
-
'pca':
|
18 |
'vae': None,
|
19 |
'cnn': None
|
20 |
}
|
@@ -55,6 +56,9 @@ def __create_model__(model_ckpt):
|
|
55 |
)
|
56 |
return tmp[0].cpu().detach().numpy(), tmp[1].cpu().detach().numpy()
|
57 |
return ret
|
|
|
|
|
|
|
58 |
|
59 |
elif model_ckpt =='kmeans-euclidean':
|
60 |
def ret(x,k):
|
|
|
5 |
from sklearn.cluster import KMeans
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,Text2TextGenerationPipeline
|
7 |
from inference_hf import InferenceHF
|
8 |
+
from .dimension_reduction import PCA
|
9 |
|
10 |
class Template:
|
11 |
def __init__(self):
|
|
|
15 |
'all-mpnet-base-v2':'''sentence-transformers/all-mpnet-base-v2'''
|
16 |
}
|
17 |
self.dimension_reduction = {
|
18 |
+
'pca': PCA,
|
19 |
'vae': None,
|
20 |
'cnn': None
|
21 |
}
|
|
|
56 |
)
|
57 |
return tmp[0].cpu().detach().numpy(), tmp[1].cpu().detach().numpy()
|
58 |
return ret
|
59 |
+
elif model_ckpt == 'pca':
|
60 |
+
pca = template.dimension_reduction[model_ckpt](0.8)
|
61 |
+
return pca
|
62 |
|
63 |
elif model_ckpt =='kmeans-euclidean':
|
64 |
def ret(x,k):
|
lrt_instance/instances.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
from lrt import LiteratureResearchTool
|
|
|
2 |
|
3 |
-
baseline_lrt = LiteratureResearchTool()
|
|
|
1 |
from lrt import LiteratureResearchTool
|
2 |
+
from lrt.clustering.config import *
|
3 |
|
4 |
+
baseline_lrt = LiteratureResearchTool()
|
scripts/tests/lrt_test_run.py
CHANGED
@@ -10,7 +10,7 @@ if __name__ == '__main__':
|
|
10 |
from lrt.utils import ArticleList
|
11 |
config = Configuration(
|
12 |
plm= 'all-mpnet-base-v2',
|
13 |
-
dimension_reduction='
|
14 |
clustering='kmeans-euclidean',
|
15 |
# keywords_extraction='KeyBartAdapter'
|
16 |
keywords_extraction= 'keyphrase-transformer'
|
|
|
10 |
from lrt.utils import ArticleList
|
11 |
config = Configuration(
|
12 |
plm= 'all-mpnet-base-v2',
|
13 |
+
dimension_reduction='pca',
|
14 |
clustering='kmeans-euclidean',
|
15 |
# keywords_extraction='KeyBartAdapter'
|
16 |
keywords_extraction= 'keyphrase-transformer'
|
setup.py
CHANGED
@@ -21,7 +21,7 @@ requirements = [
|
|
21 |
|
22 |
setup(
|
23 |
name="LiteratureResearchTool",
|
24 |
-
version="1.
|
25 |
author="Tao Xiang",
|
26 |
author_email="tao.xiang@tum.de",
|
27 |
description="A tool for literature research and analysis",
|
|
|
21 |
|
22 |
setup(
|
23 |
name="LiteratureResearchTool",
|
24 |
+
version="1.1.0",
|
25 |
author="Tao Xiang",
|
26 |
author_email="tao.xiang@tum.de",
|
27 |
description="A tool for literature research and analysis",
|
widgets/body.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
3 |
from lrt.clustering.clusters import SingleCluster
|
4 |
-
from lrt import
|
|
|
5 |
from lrt_instance import *
|
6 |
# from pyecharts.charts import Bar
|
7 |
# from pyecharts import options as opts
|
@@ -54,7 +55,7 @@ We have found following papers for you! (displaying 5 papers for each literature
|
|
54 |
|
55 |
paperInGeneral.markdown(paperInGeneral_md)
|
56 |
|
57 |
-
def render_body(platforms, num_papers, num_papers_preview, query_input, show_preview:bool,start_year,end_year,
|
58 |
|
59 |
tmp = st.empty()
|
60 |
if query_input != '':
|
@@ -66,7 +67,19 @@ def render_body(platforms, num_papers, num_papers_preview, query_input, show_pre
|
|
66 |
|
67 |
|
68 |
# lrt results
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
for i,plat in enumerate(platforms):
|
71 |
clusters, articles = next(generator)
|
72 |
st.markdown(f'''# {i+1} {plat} Results''')
|
|
|
1 |
import streamlit as st
|
2 |
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
|
3 |
from lrt.clustering.clusters import SingleCluster
|
4 |
+
from lrt.clustering.config import Configuration
|
5 |
+
from lrt import ArticleList, LiteratureResearchTool
|
6 |
from lrt_instance import *
|
7 |
# from pyecharts.charts import Bar
|
8 |
# from pyecharts import options as opts
|
|
|
55 |
|
56 |
paperInGeneral.markdown(paperInGeneral_md)
|
57 |
|
58 |
+
def render_body(platforms, num_papers, num_papers_preview, query_input, show_preview:bool,start_year,end_year, clustering_params: dict):
|
59 |
|
60 |
tmp = st.empty()
|
61 |
if query_input != '':
|
|
|
67 |
|
68 |
|
69 |
# lrt results
|
70 |
+
## baseline
|
71 |
+
if clustering_params['dimension_reduction'] == 'none':
|
72 |
+
model = baseline_lrt
|
73 |
+
else:
|
74 |
+
config = Configuration(
|
75 |
+
plm= '''all-mpnet-base-v2''',
|
76 |
+
dimension_reduction= clustering_params['dimension_reduction'],
|
77 |
+
clustering= 'kmeans-euclidean',
|
78 |
+
keywords_extraction='keyphrase-transformer'
|
79 |
+
)
|
80 |
+
model = LiteratureResearchTool(config)
|
81 |
+
|
82 |
+
generator = model(query_input,num_papers,start_year,end_year,max_k=clustering_params['max_k'],platforms=platforms)
|
83 |
for i,plat in enumerate(platforms):
|
84 |
clusters, articles = next(generator)
|
85 |
st.markdown(f'''# {i+1} {plat} Results''')
|
widgets/sidebar.py
CHANGED
@@ -3,6 +3,12 @@ import datetime
|
|
3 |
# from .utils import PACKAGE_ROOT
|
4 |
|
5 |
def render_sidebar():
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
sidebar_markdown = f'''
|
7 |
|
8 |
<center>
|
@@ -14,16 +20,14 @@ def render_sidebar():
|
|
14 |
|
15 |
|
16 |
<code>
|
17 |
-
v1.
|
18 |
</code>
|
19 |
|
20 |
|
21 |
</center>
|
22 |
|
23 |
|
24 |
-
|
25 |
-
<a href="https://github.com/Mondkuchen/idp_LiteratureResearch_Tool"><img src = "https://cdn-icons-png.flaticon.com/512/733/733609.png" width="23"></img></a> <a href="mailto:xiang.tao@outlook.de"><img src="https://cdn-icons-png.flaticon.com/512/646/646094.png" alt="email" width = "27" ></a>
|
26 |
-
</center>
|
27 |
|
28 |
---
|
29 |
|
@@ -50,7 +54,7 @@ def render_sidebar():
|
|
50 |
|
51 |
|
52 |
st.sidebar.markdown('## Choose the max number of papers to search')
|
53 |
-
number_papers=st.sidebar.slider('number',
|
54 |
|
55 |
st.sidebar.markdown('## Choose the start year of publication')
|
56 |
this_year = datetime.date.today().year
|
@@ -59,7 +63,23 @@ def render_sidebar():
|
|
59 |
st.sidebar.markdown('## Choose the end year of publication')
|
60 |
end_year = st.sidebar.slider('year end:', 2000, this_year, this_year, 1)
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
return platforms, number_papers, start_year, end_year,
|
|
|
|
|
|
|
|
3 |
# from .utils import PACKAGE_ROOT
|
4 |
|
5 |
def render_sidebar():
|
6 |
+
icons = f'''
|
7 |
+
<center>
|
8 |
+
<a href="https://github.com/Mondkuchen/idp_LiteratureResearch_Tool"><img src = "https://cdn-icons-png.flaticon.com/512/733/733609.png" width="23"></img></a> <a href="mailto:xiang.tao@outlook.de"><img src="https://cdn-icons-png.flaticon.com/512/646/646094.png" alt="email" width = "27" ></a>
|
9 |
+
</center>
|
10 |
+
'''
|
11 |
+
|
12 |
sidebar_markdown = f'''
|
13 |
|
14 |
<center>
|
|
|
20 |
|
21 |
|
22 |
<code>
|
23 |
+
v1.1.0
|
24 |
</code>
|
25 |
|
26 |
|
27 |
</center>
|
28 |
|
29 |
|
30 |
+
{icons}
|
|
|
|
|
31 |
|
32 |
---
|
33 |
|
|
|
54 |
|
55 |
|
56 |
st.sidebar.markdown('## Choose the max number of papers to search')
|
57 |
+
number_papers=st.sidebar.slider('number', 10, 200, 20, 5)
|
58 |
|
59 |
st.sidebar.markdown('## Choose the start year of publication')
|
60 |
this_year = datetime.date.today().year
|
|
|
63 |
st.sidebar.markdown('## Choose the end year of publication')
|
64 |
end_year = st.sidebar.slider('year end:', 2000, this_year, this_year, 1)
|
65 |
|
66 |
+
|
67 |
+
with st.sidebar:
|
68 |
+
st.markdown('## Adjust clustering hyperparameters')
|
69 |
+
with st.expander('Clustering Hyperparameters'):
|
70 |
+
dr = st.selectbox('1) Dimension Reduction', options=['none', 'pca'], index=0)
|
71 |
+
tmp = min(number_papers,15)
|
72 |
+
max_k = st.slider('2) Max number of clusters', 2,tmp , tmp//2)
|
73 |
+
|
74 |
+
|
75 |
+
st.markdown('---')
|
76 |
+
st.markdown(icons,unsafe_allow_html=True)
|
77 |
+
st.markdown('''<center>copyright@2022</center>''',unsafe_allow_html=True)
|
78 |
+
|
79 |
+
# st.sidebar.markdown('## Choose the number of clusters')
|
80 |
+
# k = st.sidebar.slider('number',1,10,3)
|
81 |
|
82 |
+
return platforms, number_papers, start_year, end_year, dict(
|
83 |
+
dimension_reduction= dr,
|
84 |
+
max_k = max_k
|
85 |
+
)
|