resume-ranker / core.py
zhuolisam's picture
feat:streamlit
2c14023
from embedding import embedding
from preprocessing import preprocess
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import streamlit as st
def pipeline(input_doc:str , ori_documents, embedding_type='bert'):
documents = np.array([doc['content'] for doc in ori_documents])
documents = np.insert(documents, 0, input_doc)
# st.write(documents)
preprocessed_documents = preprocess(documents)
# st.write(preprocessed_documents)
print("Encoding with BERT...")
documents_vectors = embedding(preprocessed_documents, embedding=embedding_type)
print("Encoding finished")
#compute cosine similarity
pairwise = cosine_similarity(documents_vectors)
#only retain useful information
pairwise = pairwise[0,1:]
sorted_idx = np.argsort(pairwise)[::-1]
result_pairwise = pairwise[sorted_idx]
results = []
print('Resume ranking:')
for idx in sorted_idx:
single_result = {
'rank': idx,
'name': ori_documents[idx]['name'],
'similarity': pairwise[idx].item()
}
results.append(single_result)
print(f'Resume of candidite {idx}')
print(f'Cosine Similarity: {pairwise[idx]}\n')
return results, result_pairwise