from model1 import model1 from model2 import model2 import checkTool as ct import extract_pdf as pf import extraction_data as ed # get info from hkid card def string_similarity(s1, s2): # Levenshtein distance algorithm s1 = s1.replace(' ', '') s1 = s1.lower() s2 = s2.replace(' ', '') s2 = s2.lower() if s1 == s2: return 100.0 len1 = len(s1) len2 = len(s2) matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)] for i in range(len1 + 1): matrix[i][0] = i for j in range(len2 + 1): matrix[0][j] = j for i in range(1, len1 + 1): for j in range(1, len2 + 1): if s1[i - 1] == s2[j - 1]: cost = 0 else: cost = 1 matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion matrix[i][j - 1] + 1, # insertion matrix[i - 1][j - 1] + cost) # substitution similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100 return round(similarity, 1) def get_data(img1_path, img2_path): # img_fp = 'IMG_4495.jpg' info1 = model1(img1_path) info2 = model2(img1_path) def print_info(name, valid_hkid, hkid, issuedate): print(f'Name: {name}') # name is without space print(f'HKID: {hkid} and validity: {valid_hkid}') print(f'Date of issue: {issuedate}') cinfo = ct.combine_info(info1, info2) # get info from bank # images = r'hangseng_page-0001.jpg' # bank_list = ['bankofchina','hangsengbank','hsbc','sc'] # image_path = 'hangseng_page-0001.jpg' # post_url = r'' # name = pf.get_info_from_bank(img2_path) # name = pf.check_mr(name) # name = name.replace(' ', '') # name = name.lower() # data = pf.get_info_from_bank(img2_path, file_name) data = ed.get_info_from_bank(img2_path) name = data["nameStatement"] ############# Similarity check ############## # img_fp = 'IMG_1234.jpg' name1 = cinfo[0] threshold = 85 # print(f'Name in HKID: {name1}') # print(f'Nmae in bank statement: {name}') similarity_score = string_similarity(name,name1) # print(f'Similarity: {similarity_score}') # if (similarity_score >= threshold): # Above threshold # print('It is the same person') # else: # Below threshold # print('It is not the same person') data["similarity_score"] = similarity_score data["name_on_id"] = name1 data["hkid"] = cinfo[2] data["validity"] = cinfo[1] data["issue_date"] = cinfo[3] return data # path1 = 'IMG_4495.jpg' # path2 = 'hangseng_page-0001.jpg' # print(get_score(path1, path2))