File size: 1,792 Bytes
d6585f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse

import gradio as gr
from search_online import OnlineSearcher

K = 10

# default input
DEFAULT_QUERY_MRTYDI="""μ‚¬μš©μž μ§ˆμ˜μ™€ κ΄€λ ¨λœ 문단듀을 Wikipedia ν•œκ΅­μ–΄ μ½”νΌμŠ€μ—μ„œ λ°˜ν™˜ν•©λ‹ˆλ‹€.\n
예λ₯Όλ“€μ–΄\n
- 졜초둜 μ „κΈ° μžλ™μ°¨λ₯Ό κ°œλ°œν•œ 기업은 μ–΄λ””μ•Ό?
- μŠ€μΏ λ²„ 닀이빙 잠수 κ°€λŠ₯ κΉŠμ΄λŠ” μ΅œλŒ€ λͺ‡λ―Έν„°μΈκ°€μš”?
- λ…μ κ·œμ œλ²• μƒμ˜ 기업결합심사기쀀은 기업결합을 μ–΄λ–»κ²Œ κ΅¬λΆ„ν•˜μ—¬ κ²½μŸμ œν•œμ„±μ„ νŒλ‹¨ν•˜λ‚˜μš”?
"""


# manual arguments (FIXME)
args = argparse.Namespace
args.index_type='hybrid'
args.index="/root/indexes/mrtydi-korean/sparse,/root/indexes/mrtydi-korean/dense"
args.encoder="castorini/mdpr-question-nq"
args.device="cuda:0"
args.alpha=1000000
args.normalization=True
args.lang_abbr='ko'

# initialize qabot
print("initialize Mr.tydi retrieval bot")
searcher = OnlineSearcher(args)

def Retrieve(query):
    hits = searcher.search(query, K)

    result = searcher.print_result(hits, K)
    
    return result

gr.Interface(
    fn=Retrieve, 
    inputs=[
            gr.inputs.Textbox(
            default=DEFAULT_QUERY_MRTYDI, 
            label="질의"),
        ],
    outputs=[
            gr.inputs.Textbox(
            default="κ²€μƒ‰λœ 문단듀과 점수λ₯Ό 좜λ ₯ν•©λ‹ˆλ‹€", 
            label="검색 κ²°κ³Ό")
        ], 
    title="Hybrid (term + neural) 검색",
    theme='dark-grass',
    description=f"νŠΉμ • μ§€μ‹λ² μ΄μŠ€μ— λŒ€ν•΄ ν•™μŠ΅ν•œ 검색 μ‹œμŠ€ν…œμ„ ν…ŒμŠ€νŠΈν•©λ‹ˆλ‹€.\nμ§€μ‹λ² μ΄μŠ€μ— λ§žμΆ”μ–΄ μž¬ν•™μŠ΅μ΄ κ°€λŠ₯ν•˜λ©°, ν˜„μž¬ 데λͺ¨μ—μ„œλŠ” Wikipedia ν•œκ΅­μ–΄ μ½”νΌμŠ€ (2020 dump, mr.Tydi ver, #문단=1496126)에 λŒ€ν•΄ ν•™μŠ΅ν•œ λͺ¨λΈμ„ μ‚¬μš©ν•©λ‹ˆλ‹€. \n\n neural: castorini/mdpr-[passage,question]-nq, term: BM25"
).launch(share=True)