Spaces:
Runtime error
Runtime error
File size: 1,681 Bytes
eaf119d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from pymilvus import connections, utility, DataType, FieldSchema, CollectionSchema, Collection
from sentence_transformers import SentenceTransformer
from pyvi import ViTokenizer
import string
import json
def load_json(path):
with open(path, 'r', encoding='utf-8') as file:
data = json.load(file)
return data
def convert_query(query):
tokenized_query = ViTokenizer.tokenize(query.lower())
return tokenized_query
def load_stopword(path):
stop_words = []
with open(path, 'r', encoding='utf-8') as file:
for line in file:
stop_words.append(line.strip())
return stop_words
def remove_stop_words(path, split_prompts):
stop_words = load_stopword(path)
clean_words = []
for ele in split_prompts:
if ele not in stop_words:
clean_words.append(ele.strip())
return clean_words
def clean_query(path, query):
vi_query = ViTokenizer.tokenize(query.lower())
word_query = vi_query.split(' ')
#print("word query: ", word_query)
query_remove_punc = [word.replace('_', ' ') for word in word_query if word not in string.punctuation]
removed_stop_words = remove_stop_words(path, query_remove_punc)
removed_stop_words = list(dict.fromkeys(removed_stop_words))
return removed_stop_words
def load_model(model_name):
model = SentenceTransformer(model_name)
return model
def connect_vector_db():
connections.connect('default', host='localhost', port='19530')
print("Connect finished!")
def load_collection(collection_name):
collection = Collection(collection_name)
collection.load()
print(f"{collection_name} load complete!")
return collection |