File size: 722 Bytes
000bdc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
test_df = pd.read_csv("/tmp/data/test.csv")
with open("model.pkl", "rb") as f:
model = pickle.load(f)
scores = []
for _, row in test_df.iterrows():
X_query = model["tokenizer"].transform([row["Query"]])
is_cand = sum([(model["faq_ids"] == row[f"FAQ{i+1}"]).astype(int) for i in range(3)]) > 0
sim = cosine_similarity(X_query, model["X_faq"][is_cand])[0]
score = sim.max()
scores.append(score)
predict = (np.array(scores) > model["thr"]).astype(int)
df = pd.DataFrame([(f"testid{i:04}", v) for i, v in enumerate(predict)], columns=["id", "pred"])
df.to_csv("submission.csv", index=None) |