Spaces:

haoqi7
/

research

Runtime error

File size: 5,529 Bytes

16188ba

import streamlit as st
from api_ import ArxivQuery, IEEEQuery, PaperWithCodeQuery
from lrt.clustering.clusters import SingleCluster
from lrt.clustering.config import Configuration
from lrt import ArticleList, LiteratureResearchTool
from lrt_instance import *
# from pyecharts.charts import Bar
# from pyecharts import options as opts
# import streamlit.components.v1 as st_render
# from .utils import generate_html_pyecharts
from .charts import build_bar_charts

def __preview__(platforms, num_papers, num_papers_preview, query_input,start_year,end_year):
    with st.spinner('Searching...'):
        paperInGeneral = st.empty()  # paper的大概
        paperInGeneral_md = '''# 0 Query Results Preview
We have found following papers for you! (displaying 5 papers for each literature platforms)
'''
        if 'IEEE' in platforms:
            paperInGeneral_md += '''## IEEE
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            IEEEQuery.__setup_api_key__('vpd9yy325enruv27zj2d353e')
            ieee = IEEEQuery.query(query_input,start_year,end_year,num_papers)
            num_papers_preview = min(len(ieee), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(ieee[i]['title']).replace('\n', ' ')
                publication_year = str(ieee[i]['publication_year']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''
        if 'Arxiv' in platforms:
            paperInGeneral_md += '''
## Arxiv
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            arxiv = ArxivQuery.query(query_input, max_results=num_papers)
            num_papers_preview = min(len(arxiv), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(arxiv[i]['title']).replace('\n', ' ')
                publication_year = str(arxiv[i]['published']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''
        if 'Paper with Code' in platforms:
            paperInGeneral_md += '''
## Paper with Code
| ID| Paper Title | Publication Year |
| -------- | -------- | -------- |
'''
            pwc = PaperWithCodeQuery.query(query_input, items_per_page=num_papers)
            num_papers_preview = min(len(pwc), num_papers_preview)
            for i in range(num_papers_preview):
                title = str(pwc[i]['title']).replace('\n', ' ')
                publication_year = str(pwc[i]['published']).replace('\n', ' ')
                paperInGeneral_md += f'''|{i + 1}|{title}|{publication_year}|\n'''

        paperInGeneral.markdown(paperInGeneral_md)

def render_body(platforms, num_papers, num_papers_preview, query_input, show_preview:bool, start_year, end_year, hyperparams: dict, standardization = False):

    tmp = st.empty()
    if query_input != '':
        tmp.markdown(f'You entered query: `{query_input}`')

        # preview
        if show_preview:
            __preview__(platforms,num_papers,num_papers_preview,query_input,start_year,end_year)


        # lrt results
        ## baseline
        if hyperparams['dimension_reduction'] == 'none' \
                and hyperparams['model_cpt'] == 'keyphrase-transformer'\
                and hyperparams['cluster_model'] == 'kmeans-euclidean':
            model = baseline_lrt
        else:
            config = Configuration(
                plm= '''all-mpnet-base-v2''',
                dimension_reduction= hyperparams['dimension_reduction'],
                clustering= hyperparams['cluster_model'],
                keywords_extraction=hyperparams['model_cpt']
            )
            model = LiteratureResearchTool(config)

        generator =  model(query_input, num_papers, start_year, end_year, max_k=hyperparams['max_k'], platforms=platforms, standardization=standardization)
        for i,plat in enumerate(platforms):
            clusters, articles = next(generator)
            st.markdown(f'''# {i+1} {plat} Results''')
            clusters.sort()

            st.markdown(f'''## {i+1}.1 Clusters Overview''')
            st.markdown(f'''In this section we show the overview of the clusters, more specifically,''')
            st.markdown(f'''\n- the number of papers in each cluster\n- the number of keyphrases of each cluster''')
            st.bokeh_chart(build_bar_charts(
                x_range=[f'Cluster {i + 1}' for i in range(len(clusters))],
                y_names= ['Number of Papers', 'Number of Keyphrases'],
                y_data=[[len(c) for c in clusters],[len(c.get_keyphrases()) for c in clusters]]
            ))

            st.markdown(f'''## {i+1}.2 Cluster Details''')
            st.markdown(f'''In this section we show the details of each cluster, including''')
            st.markdown(f'''\n- the article information in the cluster\n- the keyphrases of the cluster''')
            for j,cluster in enumerate(clusters):
                assert isinstance(cluster,SingleCluster) #TODO: remove this line
                ids = cluster.elements()
                articles_in_cluster = ArticleList([articles[id] for id in ids])
                st.markdown(f'''**Cluster {j + 1}**''')
                st.dataframe(articles_in_cluster.getDataFrame())
                st.markdown(f'''The top 5 keyphrases of this cluster are:''')
                md = ''
                for keyphrase in cluster.top_5_keyphrases:
                    md += f'''- `{keyphrase}`\n'''
                st.markdown(md)