import gradio as gr from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings import torch def get_matches1(query): matches = vecdb1.similarity_search_with_score(query, k=60) return matches def get_matches2(query): matches = vecdb2.similarity_search_with_score(query, k=60) return matches def get_matches3(query): matches = vecdb3.similarity_search_with_score(query, k=60) return matches def inference(query,model=1): if model==1: matches = get_matches1(query) elif model==2: matches = get_matches2(query) else: matches = get_matches3(query) auth_counts = {} j_bucket = {} n_table = [] a_table = [] scores = [round(match[1].item(), 3) for match in matches] min_score = min(scores) max_score = max(scores) def normaliser(x): return round(1 - (x-min_score)/max_score, 3) for i, match in enumerate(matches): doc = match[0] score = round(normaliser(round(match[1].item(), 3)), 3) title = doc.metadata['title'] author = doc.metadata['authors'][0].title() date = doc.metadata.get('date', 'None') link = doc.metadata.get('link', 'None') submitter = doc.metadata.get('submitter', 'None') # journal = doc.metadata.get('journal', 'None').strip() journal = doc.metadata['journal'] if (journal is None or journal.strip() == ''): journal = 'None' else: journal = journal.strip() # For journals if journal not in j_bucket: j_bucket[journal] = score else: j_bucket[journal] += score # For authors record = [i+1, score, author, title, link, date] if auth_counts.get(author, 0) < 2: n_table.append(record) if auth_counts.get(author, 0) == 0: auth_counts[author] = 1 else: auth_counts[author] += 1 # For abstracts record = [i+1, title, author, submitter, journal, date, link, score ] a_table.append(record) del j_bucket['None'] j_table = sorted([[journal, round(score, 3)] for journal, score in j_bucket.items()], key=lambda x: x[1], reverse=True) j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)] j_output = gr.Dataframe.update(value=j_table, visible=True) n_output = gr.Dataframe.update(value=n_table, visible=True) a_output = gr.Dataframe.update(value=a_table, visible=True) return [a_output, j_output, n_output] def inference1(query): return inference(query,1) def inference2(query): return inference(query,2) def inference3(query): return inference(query,3) model1_name = "biodatlab/MIReAD-Neuro-Large" model2_name = "biodatlab/MIReAD-Neuro-Contrastive" model3_name = "biodatlab/SciBERT-Neuro-Contrastive" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} faiss_embedder1 = HuggingFaceEmbeddings( model_name=model1_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) faiss_embedder2 = HuggingFaceEmbeddings( model_name=model2_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) faiss_embedder3 = HuggingFaceEmbeddings( model_name=model3_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) vecdb1 = FAISS.load_local("miread_large", faiss_embedder1) vecdb2 = FAISS.load_local("miread_contrastive", faiss_embedder2) vecdb3 = FAISS.load_local("scibert_contrastive", faiss_embedder3) with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# NBDT Recommendation Engine for Editors") gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\ The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.") abst = gr.Textbox(label="Abstract", lines=10) action1_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Large") action2_btn = gr.Button(value="Find Matches with MIReAD-Neuro-Contrastive") action3_btn = gr.Button(value="Find Matches with SciBERT-Neuro-Contrastive") with gr.Tab("Authors"): n_output = gr.Dataframe( headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'], datatype=['number', 'number', 'str', 'str', 'str', 'str'], col_count=(6, "fixed"), wrap=True, visible=False ) with gr.Tab("Abstracts"): a_output = gr.Dataframe( headers=['No.', 'Title', 'Author', 'Corresponding Author', 'Journal', 'Date', 'Link', 'Score'], datatype=['number', 'str', 'str', 'str', 'str', 'str', 'str', 'number'], col_count=(8, "fixed"), wrap=True, visible=False ) with gr.Tab("Journals"): j_output = gr.Dataframe( headers=['No.', 'Name', 'Score'], datatype=['number', 'str', 'number'], col_count=(3, "fixed"), wrap=True, visible=False ) action_btn1.click(fn=inference1, inputs=[ abst, ], outputs=[a_output, j_output, n_output], api_name="neurojane") action_btn2.click(fn=inference2, inputs=[ abst, ], outputs=[a_output, j_output, n_output], api_name="neurojane") action_btn3.click(fn=inference3, inputs=[ abst, ], outputs=[a_output, j_output, n_output], api_name="neurojane") demo.launch(debug=True)