svjack commited on
Commit
67edfa5
1 Parent(s): 846b7bf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_from_disk, load_dataset
2
+ import pandas as pd
3
+ import os
4
+ import gradio as gr
5
+
6
+ ds_with_embeddings = load_dataset("svjack/bloom-dialogue-generate-ds-zh", split="train")
7
+ ds_with_embeddings.add_faiss_index(column='embeddings')
8
+ from sentence_transformers import SentenceTransformer
9
+ encoder = SentenceTransformer("sentence-transformers/LaBSE")
10
+
11
+ def retrieve_search_df(question = "今天天气不错。", top_k = 10):
12
+ question_embedding = encoder.encode(question)
13
+ scores, retrieved_examples = ds_with_embeddings.get_nearest_examples('embeddings', question_embedding, k=top_k)
14
+ sdf = pd.DataFrame(retrieved_examples)
15
+ sdf["scores"] = scores
16
+ return sdf[["question", "dialogue_text", "dialogue", "repo", "scores"]]
17
+
18
+ example_sample = [
19
+ ["今天天气不错。", 3],
20
+ ["你想吃点什么?", 5],
21
+ ]
22
+
23
+ def demo_func(prefix, max_length):
24
+ max_length = max(int(max_length), 3)
25
+ l = retrieve_search_df(prefix, max_length)[["dialogue", "repo"]].values.tolist()
26
+ assert type(l) == type([])
27
+ return {
28
+ "Dialogue Context": l
29
+ }
30
+
31
+ demo = gr.Interface(
32
+ fn=demo_func,
33
+ inputs=[gr.Text(label = "Prefix"),
34
+ gr.Number(label = "Top K", value = 10)
35
+ ],
36
+ outputs="json",
37
+ title=f"Bloom and GPT Chinese Daliy Dialogue Generator 🌸🐰 sample search demonstration",
38
+ description = 'This _example_ was **drive** from <br/><b><h4>[https://github.com/svjack/Daliy-Dialogue](https://github.com/svjack/Daliy-Dialogue)</h4></b>\n',
39
+ examples=example_sample if example_sample else None,
40
+ cache_examples = False
41
+ )
42
+
43
+ demo.launch(server_name=None, server_port=None)