Spaces:
Sleeping
Sleeping
Mitul Mohammad Abdullah Al Mukit
commited on
Commit
•
9312707
1
Parent(s):
ddf4d52
update
Browse files- .gitignore +9 -5
- README.md +24 -13
- __pycache__/check_hkid_validity.cpython-311.pyc +0 -0
- __pycache__/demo.cpython-311.pyc +0 -0
- __pycache__/extract_pdf.cpython-311.pyc +0 -0
- __pycache__/extraction_data.cpython-311.pyc +0 -0
- __pycache__/imageSegmentation.cpython-311.pyc +0 -0
- __pycache__/similarity_check.cpython-311.pyc +0 -0
- check_hkid_validity.py +76 -0
- demo.py +4 -0
- extract_pdf.py +5 -5
- extraction_data.py +23 -38
- image/DONT_DELETE.txt +0 -0
- imageSegmentation.py +19 -9
- model0.py +54 -0
- request_json/__pycache__/sbt_request_generator.cpython-311.pyc +0 -0
- request_json/sbt_request_generator.py +15 -37
- saved/DONT_DELETE.txt +0 -0
- sbt_request.txt +23 -11
- similarity_check.py +9 -3
- test.py +2 -1
- test_ocr.py +4 -0
- webapp.py +63 -26
.gitignore
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
-
image
|
2 |
-
saved
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
image/hkid.jpg
|
2 |
+
saved/HKID.jpg
|
3 |
+
.DS_Store
|
4 |
+
bank_statement/*
|
5 |
+
bank_statement
|
6 |
+
data1.txt
|
7 |
+
.env
|
8 |
+
test.py
|
9 |
+
dontTouchMe
|
README.md
CHANGED
@@ -1,13 +1,24 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Similarity_check
|
2 |
+
|
3 |
+
This application serves as a similarity check of user's name during registration
|
4 |
+
|
5 |
+
## Installation
|
6 |
+
|
7 |
+
Use the package manager [pip](https://pip.pypa.io/en/stable/) to install packages.
|
8 |
+
```
|
9 |
+
pip install -r requirements.txt
|
10 |
+
```
|
11 |
+
|
12 |
+
## Usage
|
13 |
+
|
14 |
+
Run web UI
|
15 |
+
```
|
16 |
+
streamlit run webapp.py
|
17 |
+
```
|
18 |
+
|
19 |
+
## Uploading Files
|
20 |
+
|
21 |
+
jpg/jpeg format of the HKID and bank statement are required to run the application
|
22 |
+
|
23 |
+
## Connecion to database
|
24 |
+
The code related to connecting to database is done through API request, and it can be checked in transaction_api repository.
|
__pycache__/check_hkid_validity.cpython-311.pyc
ADDED
Binary file (3.15 kB). View file
|
|
__pycache__/demo.cpython-311.pyc
CHANGED
Binary files a/__pycache__/demo.cpython-311.pyc and b/__pycache__/demo.cpython-311.pyc differ
|
|
__pycache__/extract_pdf.cpython-311.pyc
CHANGED
Binary files a/__pycache__/extract_pdf.cpython-311.pyc and b/__pycache__/extract_pdf.cpython-311.pyc differ
|
|
__pycache__/extraction_data.cpython-311.pyc
ADDED
Binary file (4.21 kB). View file
|
|
__pycache__/imageSegmentation.cpython-311.pyc
CHANGED
Binary files a/__pycache__/imageSegmentation.cpython-311.pyc and b/__pycache__/imageSegmentation.cpython-311.pyc differ
|
|
__pycache__/similarity_check.cpython-311.pyc
CHANGED
Binary files a/__pycache__/similarity_check.cpython-311.pyc and b/__pycache__/similarity_check.cpython-311.pyc differ
|
|
check_hkid_validity.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnocr import CnOcr
|
2 |
+
|
3 |
+
def string_similarity(s1, s2): # Levenshtein distance algorithm
|
4 |
+
|
5 |
+
s1 = s1.replace(' ','')
|
6 |
+
s1 = s1.lower()
|
7 |
+
s2 = s2.replace(' ','')
|
8 |
+
s2 = s2.lower()
|
9 |
+
|
10 |
+
if s1 == s2:
|
11 |
+
return 100.0
|
12 |
+
|
13 |
+
len1 = len(s1)
|
14 |
+
len2 = len(s2)
|
15 |
+
matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
|
16 |
+
|
17 |
+
for i in range(len1 + 1):
|
18 |
+
matrix[i][0] = i
|
19 |
+
|
20 |
+
for j in range(len2 + 1):
|
21 |
+
matrix[0][j] = j
|
22 |
+
|
23 |
+
for i in range(1, len1 + 1):
|
24 |
+
for j in range(1, len2 + 1):
|
25 |
+
if s1[i - 1] == s2[j - 1]:
|
26 |
+
cost = 0
|
27 |
+
else:
|
28 |
+
cost = 1
|
29 |
+
matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion
|
30 |
+
matrix[i][j - 1] + 1, # insertion
|
31 |
+
matrix[i - 1][j - 1] + cost) # substitution
|
32 |
+
|
33 |
+
similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
|
34 |
+
return round(similarity, 1)
|
35 |
+
|
36 |
+
def is_good_subsequence(s1, s2):
|
37 |
+
len_s2 = len(s2)
|
38 |
+
len_s1 = len(s1)
|
39 |
+
|
40 |
+
s1 = s1.lower()
|
41 |
+
s2 = s2.lower()
|
42 |
+
|
43 |
+
if len_s2 > len_s1 + 10:
|
44 |
+
return False
|
45 |
+
|
46 |
+
# Initialize variables for counting matches
|
47 |
+
match_count = 0
|
48 |
+
s1_index = 0
|
49 |
+
|
50 |
+
# Iterate over each character in s2
|
51 |
+
for char in s2:
|
52 |
+
# Search for the character in s1 starting from the last matched index
|
53 |
+
while s1_index < len_s1:
|
54 |
+
if s1[s1_index] == char:
|
55 |
+
match_count += 1
|
56 |
+
s1_index += 1
|
57 |
+
break
|
58 |
+
s1_index += 1
|
59 |
+
|
60 |
+
# Check if the match count is more than 70% of s2 length
|
61 |
+
return match_count >= (0.5 * len_s2)
|
62 |
+
|
63 |
+
def check_hkid(path):
|
64 |
+
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
65 |
+
# ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
|
66 |
+
out = ocr.ocr(path)
|
67 |
+
|
68 |
+
for data in out:
|
69 |
+
text = data['text']
|
70 |
+
|
71 |
+
if string_similarity('HONGKONGPERMANENTIDENTITYCARD', text) > 60:
|
72 |
+
return True
|
73 |
+
|
74 |
+
return False
|
75 |
+
|
76 |
+
# print(check_hkid('image/hkid.jpg'))
|
demo.py
CHANGED
@@ -13,6 +13,8 @@ import Visualization_utilities as vis
|
|
13 |
# Load a sample picture and learn how to recognize it.
|
14 |
|
15 |
def get_face_encoding(path):
|
|
|
|
|
16 |
HKID_cropped = imageSegmentation.auto_cropping(path)
|
17 |
cv2.imwrite('saved/HKID.jpg', HKID_cropped)
|
18 |
HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
|
@@ -63,6 +65,8 @@ def process_frame(frame, process_this_frame, face_locations, faces, face_names,
|
|
63 |
|
64 |
hkid_face_encoding = get_face_encoding("image")
|
65 |
|
|
|
|
|
66 |
known_face_encodings = [
|
67 |
hkid_face_encoding
|
68 |
]
|
|
|
13 |
# Load a sample picture and learn how to recognize it.
|
14 |
|
15 |
def get_face_encoding(path):
|
16 |
+
print(f'path: {path}')
|
17 |
+
print('hello')
|
18 |
HKID_cropped = imageSegmentation.auto_cropping(path)
|
19 |
cv2.imwrite('saved/HKID.jpg', HKID_cropped)
|
20 |
HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
|
|
|
65 |
|
66 |
hkid_face_encoding = get_face_encoding("image")
|
67 |
|
68 |
+
print(f'encoding: {hkid_face_encoding}')
|
69 |
+
|
70 |
known_face_encodings = [
|
71 |
hkid_face_encoding
|
72 |
]
|
extract_pdf.py
CHANGED
@@ -43,7 +43,7 @@ def get_info_from_bank(img_path, file_name):
|
|
43 |
out = ocr.ocr(img_path)
|
44 |
# Data
|
45 |
bank_data = {
|
46 |
-
"
|
47 |
"address": "",
|
48 |
"bank": check_bank_name(file_name),
|
49 |
"date": "",
|
@@ -67,8 +67,8 @@ def get_info_from_bank(img_path, file_name):
|
|
67 |
pass
|
68 |
elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
|
69 |
if (raw_detected_text != ''): # name
|
70 |
-
bank_data["
|
71 |
-
bank_data["
|
72 |
elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
|
73 |
if (raw_detected_text != ''): # position
|
74 |
bank_data["address"] += raw_detected_text
|
@@ -100,13 +100,13 @@ def get_info_from_bank(img_path, file_name):
|
|
100 |
|
101 |
|
102 |
# print('------------From bank statement------------')
|
103 |
-
# print(f'Name: {bank_data["
|
104 |
# print(f'Address: {bank_data["address"]}')
|
105 |
# print(f'Bank: {bank_data["bank"]}')
|
106 |
# print(f'Date: {bank_data["date"]}')
|
107 |
# print(f'Asset: {asset_equa} = {bank_data["asset"]}')
|
108 |
# print(f'Liabilities: {bank_data["liabilities"]}')
|
109 |
-
# post_data(bank_data["bank"], bank_data["
|
110 |
return bank_data
|
111 |
|
112 |
########## Posting data through API ############
|
|
|
43 |
out = ocr.ocr(img_path)
|
44 |
# Data
|
45 |
bank_data = {
|
46 |
+
"nameStatement": "",
|
47 |
"address": "",
|
48 |
"bank": check_bank_name(file_name),
|
49 |
"date": "",
|
|
|
67 |
pass
|
68 |
elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
|
69 |
if (raw_detected_text != ''): # name
|
70 |
+
bank_data["nameStatement"] += raw_detected_text
|
71 |
+
bank_data["nameStatement"] = check_mr(bank_data["nameStatement"])
|
72 |
elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
|
73 |
if (raw_detected_text != ''): # position
|
74 |
bank_data["address"] += raw_detected_text
|
|
|
100 |
|
101 |
|
102 |
# print('------------From bank statement------------')
|
103 |
+
# print(f'Name: {bank_data["nameStatement"]}')
|
104 |
# print(f'Address: {bank_data["address"]}')
|
105 |
# print(f'Bank: {bank_data["bank"]}')
|
106 |
# print(f'Date: {bank_data["date"]}')
|
107 |
# print(f'Asset: {asset_equa} = {bank_data["asset"]}')
|
108 |
# print(f'Liabilities: {bank_data["liabilities"]}')
|
109 |
+
# post_data(bank_data["bank"], bank_data["nameStatement"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
|
110 |
return bank_data
|
111 |
|
112 |
########## Posting data through API ############
|
extraction_data.py
CHANGED
@@ -2,6 +2,9 @@
|
|
2 |
from cnocr import CnOcr
|
3 |
from pdfquery import PDFQuery
|
4 |
import openai
|
|
|
|
|
|
|
5 |
|
6 |
def validate(text):
|
7 |
invalid_list = [' ',',']
|
@@ -39,58 +42,40 @@ def check_mr(text):
|
|
39 |
else:
|
40 |
return text
|
41 |
|
42 |
-
def get_info_from_bank(img_path
|
43 |
# Running the model
|
44 |
ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
|
45 |
out = ocr.ocr(img_path)
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
"name_on_bs": "",
|
50 |
-
"address": "",
|
51 |
-
"bank": "",
|
52 |
-
"date": "",
|
53 |
-
"asset": 0.0,
|
54 |
-
"liabilities": ""
|
55 |
-
}
|
56 |
-
|
57 |
-
# {
|
58 |
-
# "Customer Name": "MR CHIU CHUNG YIN",
|
59 |
-
# "Address": "FLAT 13,8/F,OILOK HOUSE, YAU OI ESTATE, TUEN MUN NT",
|
60 |
-
# "Bank Name": "HSBC",
|
61 |
-
# "Statement Issue Date": "10 January 2023",
|
62 |
-
# "Total Asset": "7,265.80",
|
63 |
-
# "Total Liability": "7,265.80"
|
64 |
-
# }
|
65 |
-
|
66 |
-
openai.api_key = "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"
|
67 |
invalid_list = [' ',',']
|
68 |
data_set_1 = []
|
69 |
|
70 |
-
pdf = PDFQuery(pdf_path)
|
71 |
-
pdf.load(0)
|
72 |
-
text_elements = pdf.pq('LTTextLineHorizontal').text()
|
73 |
-
text_elements = text_elements.replace("cid:", "")
|
74 |
-
|
75 |
for item in out:
|
76 |
if item['text'] not in invalid_list:
|
77 |
data_set_1.append(item['text'])
|
78 |
|
79 |
completion = openai.ChatCompletion.create(
|
80 |
model = "gpt-3.5-turbo",
|
81 |
-
temperature = 0
|
82 |
messages = [
|
83 |
-
{"role": "system", "content": "You are an AI assistant for extracting data from bank statements. Uppercase and lowercase letters are the same.
|
84 |
-
{"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1}
|
85 |
-
# {"role": "assistant", "content": "Q: How do you make 7 even? A: Take away the s."},
|
86 |
-
# {"role": "user", "content": "Write one related to programmers."}
|
87 |
]
|
88 |
)
|
89 |
-
bs_data = completion['choices'][0]['message']['content']
|
90 |
-
print(bs_data)
|
91 |
-
return bs_data
|
92 |
|
93 |
-
#
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from cnocr import CnOcr
|
3 |
from pdfquery import PDFQuery
|
4 |
import openai
|
5 |
+
import json
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import os
|
8 |
|
9 |
def validate(text):
|
10 |
invalid_list = [' ',',']
|
|
|
42 |
else:
|
43 |
return text
|
44 |
|
45 |
+
def get_info_from_bank(img_path):
|
46 |
# Running the model
|
47 |
ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
|
48 |
out = ocr.ocr(img_path)
|
49 |
|
50 |
+
load_dotenv()
|
51 |
+
openai.api_key = os.environ.get("data-extraction-api")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
invalid_list = [' ',',']
|
53 |
data_set_1 = []
|
54 |
|
|
|
|
|
|
|
|
|
|
|
55 |
for item in out:
|
56 |
if item['text'] not in invalid_list:
|
57 |
data_set_1.append(item['text'])
|
58 |
|
59 |
completion = openai.ChatCompletion.create(
|
60 |
model = "gpt-3.5-turbo",
|
61 |
+
temperature = 0,
|
62 |
messages = [
|
63 |
+
{"role": "system", "content": "You are an AI assistant for extracting data with following names(bank, nameStatement, address, totalAsset (only HKD and represent as one number), totalLiability, statementDate) from bank statements. Uppercase and lowercase letters are the same. Store the results in dictionary format"},
|
64 |
+
{"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1}. (1.) Data that locate in the front part of the text: customer full name (it should be a Chinese name in English spelling and two to three words), address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
|
|
|
|
|
65 |
]
|
66 |
)
|
|
|
|
|
|
|
67 |
|
68 |
+
# bs_data = completion['choices'][0]['message']
|
69 |
+
data = completion['choices'][0]['message']['content']
|
70 |
+
bs_data = json.loads(data)
|
71 |
+
# for data_item in bs_data:
|
72 |
+
# if 'name' in data_item:
|
73 |
+
# bs_data[''] = check_mr
|
74 |
+
# print(bs_data)
|
75 |
+
# new_name = check_mr(bs_data["nameStatement"])
|
76 |
+
bs_data["nameStatement"] = check_mr(bs_data["nameStatement"])
|
77 |
+
# bs_data["totalAsset"] = bs_data["totalAsset"].replace("HKD","")
|
78 |
+
# bs_data["totalLiability"] = bs_data["totalLiability"].replace("HKD","")
|
79 |
+
# bs_data["totalLiability"] = bs_data["totalLiability"].replace("DR","")
|
80 |
+
# print(bs_data)
|
81 |
+
return bs_data
|
image/DONT_DELETE.txt
ADDED
File without changes
|
imageSegmentation.py
CHANGED
@@ -22,34 +22,42 @@ detector = vision.FaceDetector.create_from_options(options)
|
|
22 |
def crop(
|
23 |
image,
|
24 |
detection_result
|
25 |
-
)
|
26 |
-
annotated_image = image.copy()
|
27 |
-
height, width, _ = image.shape
|
28 |
|
|
|
29 |
# Here assume we only detect one face
|
30 |
for detection in detection_result.detections:
|
31 |
# Crop detected face
|
32 |
bbox = detection.bounding_box
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
36 |
|
37 |
def auto_cropping(dir):
|
38 |
|
39 |
files = os.listdir(dir) # list of files in directory
|
40 |
|
|
|
41 |
for file in files:
|
42 |
-
|
|
|
43 |
file_dir = Path(dir + "/" + file)
|
44 |
abs_path = file_dir.resolve()
|
45 |
|
46 |
img = mp.Image.create_from_file(str(abs_path))
|
47 |
|
48 |
detection_result = detector.detect(img)
|
49 |
-
save_path = 'saved'
|
50 |
|
51 |
image_copy = np.copy(img.numpy_view())
|
52 |
annotated_image = crop(image_copy, detection_result)
|
|
|
|
|
|
|
|
|
53 |
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
|
54 |
|
55 |
return rgb_annotated_image
|
@@ -57,4 +65,6 @@ def auto_cropping(dir):
|
|
57 |
# auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
|
58 |
|
59 |
# The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
|
60 |
-
# my idea is to use another model to classify if the cropped image is real human face
|
|
|
|
|
|
22 |
def crop(
|
23 |
image,
|
24 |
detection_result
|
25 |
+
):
|
26 |
+
# annotated_image = image.copy()
|
27 |
+
# height, width, _ = image.shape
|
28 |
|
29 |
+
print(image.shape)
|
30 |
# Here assume we only detect one face
|
31 |
for detection in detection_result.detections:
|
32 |
# Crop detected face
|
33 |
bbox = detection.bounding_box
|
34 |
+
print(f'bbox {bbox}')
|
35 |
+
cropped_img = image[bbox.origin_y: bbox.origin_y + bbox.height, bbox.origin_x:bbox.origin_x + bbox.width]
|
36 |
+
# cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
|
37 |
+
print(f'crop: {cropped_img}')
|
38 |
+
return cropped_img
|
39 |
|
40 |
def auto_cropping(dir):
|
41 |
|
42 |
files = os.listdir(dir) # list of files in directory
|
43 |
|
44 |
+
print(files)
|
45 |
for file in files:
|
46 |
+
if file == "DONT_DELETE.txt":
|
47 |
+
continue
|
48 |
file_dir = Path(dir + "/" + file)
|
49 |
abs_path = file_dir.resolve()
|
50 |
|
51 |
img = mp.Image.create_from_file(str(abs_path))
|
52 |
|
53 |
detection_result = detector.detect(img)
|
|
|
54 |
|
55 |
image_copy = np.copy(img.numpy_view())
|
56 |
annotated_image = crop(image_copy, detection_result)
|
57 |
+
|
58 |
+
print('hello')
|
59 |
+
print(annotated_image)
|
60 |
+
|
61 |
rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
|
62 |
|
63 |
return rgb_annotated_image
|
|
|
65 |
# auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
|
66 |
|
67 |
# The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
|
68 |
+
# my idea is to use another model to classify if the cropped image is real human face
|
69 |
+
|
70 |
+
# print(auto_cropping("image"))
|
model0.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from cnocr import CnOcr
|
2 |
+
import openai
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
|
7 |
+
def model0(path):
|
8 |
+
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
9 |
+
out = ocr.ocr(path)
|
10 |
+
|
11 |
+
print(out)
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
openai.api_key = os.environ.get("data-extraction-api")
|
15 |
+
|
16 |
+
invalid_list = [' ',',']
|
17 |
+
data_set_1 = []
|
18 |
+
for item in out:
|
19 |
+
if item['text'] not in invalid_list:
|
20 |
+
data_set_1.append(item['text'])
|
21 |
+
|
22 |
+
completion = openai.ChatCompletion.create(
|
23 |
+
model = "gpt-3.5-turbo",
|
24 |
+
temperature = 0,
|
25 |
+
messages = [
|
26 |
+
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
|
27 |
+
(name, HKID number, date of issue) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
|
28 |
+
dictionary format"},
|
29 |
+
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
|
30 |
+
You have three types of data to extract. \
|
31 |
+
1. id card holder full name (it noramlly is a chinese name, including surname and family \
|
32 |
+
name in English spelling, and it may be separate in different fields in the data set for surname and family name \
|
33 |
+
sometimes) \
|
34 |
+
2. issue date (should be a date with month and day, e.g. 19-97 is the required format, but 26-11-18 is not \
|
35 |
+
because date of issue of have 5 characters) Only choose valid format!!! \
|
36 |
+
3. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
|
37 |
+
(a) @ represents any one or two capital letters of the alphabet. \
|
38 |
+
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
|
39 |
+
Remember to include the check digit with () \
|
40 |
+
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
|
41 |
+
]
|
42 |
+
)
|
43 |
+
|
44 |
+
data = completion['choices'][0]['message']['content']
|
45 |
+
|
46 |
+
print(data)
|
47 |
+
|
48 |
+
id_data = json.loads(data)
|
49 |
+
|
50 |
+
print(id_data)
|
51 |
+
return
|
52 |
+
# return [name, valid_hkid, hkid, issuedate]
|
53 |
+
|
54 |
+
model0('dontTouchMe/IMG_4499.jpg')
|
request_json/__pycache__/sbt_request_generator.cpython-311.pyc
CHANGED
Binary files a/request_json/__pycache__/sbt_request_generator.cpython-311.pyc and b/request_json/__pycache__/sbt_request_generator.cpython-311.pyc differ
|
|
request_json/sbt_request_generator.py
CHANGED
@@ -51,9 +51,8 @@ def generate_request(data):
|
|
51 |
|
52 |
|
53 |
def split_data(data):
|
54 |
-
request_id = "request1234"
|
55 |
-
# token_id =
|
56 |
-
token_id = "12344321"
|
57 |
|
58 |
f = open('data1.txt', 'r')
|
59 |
with open('data1.txt') as f:
|
@@ -62,47 +61,26 @@ def split_data(data):
|
|
62 |
|
63 |
if "avg_score" not in data.keys():
|
64 |
data["avg_score"] = "0"
|
|
|
|
|
65 |
|
66 |
-
|
67 |
"endpoint": "SBT",
|
68 |
-
"apiType": "
|
69 |
-
"requestId": "
|
70 |
"date": get_today_date(), # a string
|
71 |
-
"tokenID": token_id,# a string
|
72 |
"docType": "HKID",
|
73 |
"nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
|
74 |
"docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
|
75 |
"docValidity": data["validity"], # a string; "True" or "False"
|
76 |
"dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
|
77 |
-
"matchingScore": str(data["
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
}
|
79 |
|
80 |
-
|
81 |
-
"endpoint": "SBT",
|
82 |
-
"apiType": "store_statement_verif",
|
83 |
-
"requestId": "request_id_bs",
|
84 |
-
"date": get_today_date(), # a string
|
85 |
-
"tokenID": token_id, # a string
|
86 |
-
"bank":data["bank"], #
|
87 |
-
"nameStatement":data["name_on_bs"], #
|
88 |
-
"address":data["address"], #
|
89 |
-
"asset": str(data["asset"]), # a string containing only numbers
|
90 |
-
"liability": data["liabilities"], # a string containing only numbers
|
91 |
-
"statementDate": data["date"], # a string
|
92 |
-
}
|
93 |
-
|
94 |
-
generate_request(legal_doc_data)
|
95 |
-
generate_request(bank_statement_data)
|
96 |
-
|
97 |
-
|
98 |
-
# demo structure of the data
|
99 |
-
# {"password2": "chingfuilau", "username": "Allenlau1111", "password1": "Allen02118173", "date": "2023-03-03 00:00:00",
|
100 |
-
# "credentialId": "testing123","requestID": "test_statements",
|
101 |
-
# "userId": "7893456",
|
102 |
-
# "endpoint": "SBT",
|
103 |
-
# "apiType": "metadata",
|
104 |
-
# 'tokenId':"500",
|
105 |
-
# "ipfsLink1": ".",
|
106 |
-
# "ipfsLink2": "..",
|
107 |
-
# "ipfsLink3": "...",
|
108 |
-
# "membershipStatus": "1"}
|
|
|
51 |
|
52 |
|
53 |
def split_data(data):
|
54 |
+
# request_id = "request1234"
|
55 |
+
# token_id = "12344321"
|
|
|
56 |
|
57 |
f = open('data1.txt', 'r')
|
58 |
with open('data1.txt') as f:
|
|
|
61 |
|
62 |
if "avg_score" not in data.keys():
|
63 |
data["avg_score"] = "0"
|
64 |
+
elif "similarity_score" not in data.keys():
|
65 |
+
data["similarity_score"] = "0"
|
66 |
|
67 |
+
sbt_data = {
|
68 |
"endpoint": "SBT",
|
69 |
+
"apiType": "store_img_verif",
|
70 |
+
"requestId": "request_id_1234",
|
71 |
"date": get_today_date(), # a string
|
|
|
72 |
"docType": "HKID",
|
73 |
"nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
|
74 |
"docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
|
75 |
"docValidity": data["validity"], # a string; "True" or "False"
|
76 |
"dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
|
77 |
+
"matchingScore": str(data["similarity_score"]), # a string; e.g. "0.957"
|
78 |
+
"bank":str(data["bank"]), #
|
79 |
+
"nameStatement":str(data["nameStatement"]), #
|
80 |
+
"address":str(data["address"]), #
|
81 |
+
"asset": str(data["totalAsset"]), # a string containing only numbers
|
82 |
+
"liability": str(data["totalLiability"]), # a string containing only numbers
|
83 |
+
"statementDate": str(data["statementDate"]), # a string
|
84 |
}
|
85 |
|
86 |
+
generate_request(sbt_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
saved/DONT_DELETE.txt
ADDED
File without changes
|
sbt_request.txt
CHANGED
@@ -1,22 +1,34 @@
|
|
1 |
-
endpoint: b'
|
2 |
|
3 |
-
apiType: b'
|
4 |
|
5 |
-
requestId: b'
|
6 |
|
7 |
-
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
16 |
|
17 |
-
|
18 |
|
19 |
-
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
1 |
+
endpoint: b'NbBxyA/6/7XAzpk8HYqkN6D7k7yD8/c2e53wyblTHcVg0D1EgYnZodoTuvDpEMCkouCyk80xAhIynxYa0lsRfLsI4438OLHuad/gvZt29v1cYqxVYqs+udA/Hd2zbsr4zYKORBW+LJhj9u+8zefNEGCstMK215eeht0ZpxLlgQMQdApSkPsPJnYO3VLWmBiV7txwZGhkqtOvE+cfsUEsulSpdhpRvcj5IGMNrF/14gLKHWjxTKj45At2w1ToBzIwfPDPrF9QA4mw6kUs25UFoSOFOo5/OMbW2iSDpYFE4UHj74Z75hzYJWdWlDTKLZUDWxwOF19CGSbKNxZt5Fsr3W2k17W5D5CG0skkEl4lhCyepoyGNWJZCNJ65PRFcUYyT2JqWRQM7htIKgbs1lZlHl85go3Z9ULNf3CgJeIZGwo61a2jwDPB7lr9LmpVk6tz5HLUNaKpyGZaebiB2J/Fk7MyrYd/r5AB8qLgZXgp61YRFv7bFpZAKIRnMTZP1FH3ZLR8CE9sISWxsx5WipGmSI3FmTASg9Xf7PBdyx3+GChiP7bxmBCDhQ/BqOn45ULOIyHURQ+im4O9B8MEx0XlWhilK0Fz0LtXnT0PGG2GdTE/B9kaEd6g6u8oaT4+3WeyZTN3Jtae0yFQ9iKnwPoR45LtXejfvQgaqAjauUnFDBs='
|
2 |
|
3 |
+
apiType: b'fytDJK29mmABmyNae11+Sn21WO41siR45mS8s5fH0KrHcgXJPIobqC1LuhV640z0AmdKvr5m4uLjCQYG2vsKLPPT83S872E0R0W2/5pfwoTgn0ezMnhHe9Gcfc3dV9pHcg1JgxQ2+/RJcajzT79X8QkQnGMVUkYj+8vy0RE09to7kGkzjsnnzYgHkewsnt2IKLSsrMgVbQte7QdVSSguq/z3C6EinEKkwN9GjrOxeL3vFGgf2Qwpx62kLOkFVPIqqzAG+/HU3psAMHXUf9Rea35wRPLDzZh7REAgZChTQ03qwGYwYMqEBrUjh92/4bZkZYYQmE4vc14gZCEFMnZgfMpa7r2kpsoiwwb01BF1WErI50pfLm/eeBw1G7OpNmVfNi04/7kc1Xqvf1Co9iNXNm9sSnwk9Vs8F7LC+bFe2ZKaAcQ7WBqPJefirJzUvz/0J8dqmfwm1m3/jIpQ5sRg6lrxd1omo5jeAspyx1DLTFqu61/o+H/30CeakVCkcbmsh53s8DOpRml7MIGKJQa3+u1tKo3VaCuiuoeDaQgX6M9sJaFw4AIAAl01Va7feqi11P3xFVt8y7dQJmfTwCBQxmFQwFAU2g0Ts91yUkSnD0AKMn7S0watnKtcJnd+33yomMm2AFhcljrlp/U+bztBueVShU8VZyd8LcxWAcPaszA='
|
4 |
|
5 |
+
requestId: b'HngLFH+4oanRITJPMA1Ub9tV/fbI9i17SY27qmqoxaYYtK63S9OhUg+OOIMNBQMGsjJ6pHfWfAB89xFDJ0QM3fUvhd1BHiTpyGnL5mqwoqTPouTyRARyAX+6LgEwe/vOX/LJGE1GbGjzV6RNKNn2cHm3Va6zGMMvensZkuW3cs+GbYuv5+22pk1Sot4jDl6uX0/CayqDchU9XpMo5BgbXDth0Xve2LtfKMQrKtWQ3926Dr3G219F2T3xIaQCWcP6lo0scK2Uk4RJWdHDl9Znvx6pjh6NY+ygHzUvbdBnrjTII6OOgBTPIslmhiKr4u3QHwv/zqzoQcMrncJ8IlLvJ5RhcwK1gtty+r7ank88TDyhm8pfv3mD31ei503410OMuDveiHLUwkjE3GvLhcbQ3Ukxg8xP5S/DsvWYNkq4ZanLJFS9pkSaEhpz1SnY9idXxVy3mnaNfg/npGk/yUHUx9KZ/i8DkP1RtjnDWFen/pD8Op4/KIx4pMm/+wHsCRRSMmpoqM/SaeqKXjota728eO9fg+vuADHL0nIpB4bQk/mSR88MrAwKnYye4arhH85v5FvaBwttpsuHAopB57+UXvs6e8nD7WjRuWY/S3DU8cFZqw5SCz9GbjmXD2Kj5AA3HXrfwEPrbPKA39bqjg82gd+aCtlfmNAffxM53svwBMI='
|
6 |
|
7 |
+
userId: b'CeUitfymDZUWw3IHsufjBUMeNOovvQ8EA5fKgQ7TfHuYaiuLl2OMUVuTc/BaaCY1JFgCo2ek2gWFBFJ4fzRyYFR8+eYg9GEYIjuP9s1/PG+iKsYJKgPR1iCZPPLtcEi39Ch7J7c9swq3ce67v8omSPqLRA6mUGlNrDc1qJavgTJBqsqLIMDjFEU9AgrUiyoR3VH0HRggavF2t/lhy7i96K97jpTKaE1NCTUc3lrFQkFpg2iLVdQCb5FukAdZY7UsT5KgU/vjZOUdufj6VpAQQe38zatCdnYxgu5q/0UlxZdhGhg4DIktVzVFv350cpKrOaIsyoLNAynEhQRyPXMp01Oa2Ol0BfdQnsBI35s/mY3EgzcxGnGIBLe0uzXcy++/X8aj5d44Ct6sZwCfKe093fWrkT2JnoFiZJ/fzTAWAuQKNUF1V9oj7n7VT7FDc+K5STiPS5fwRKk9LazpB0CxY17I5yFEGTTG20iXzrlqRCXGJIvi3rbDqk3F1oHU15UqTiw6Xq9/d4CsDBQb6RBl1VE0T19EyCLL7SSyvV1N9onYQSGCAGeuQvwPijgDzqfYufHZZ5ET0dAWu5BzGIvEwQqvtrmsDBa+QHeJj3IS+UKQYozsNhRH1UI4mNpd+2xOSklbr82QKYAgqsf3x9/w6kmn9UBCfk+ERSQ2pNIeOC8='
|
8 |
|
9 |
+
date: b'XBZNHz0m2aqUlKkGKobCyEdg8wPcmGYPwFNAvKH+5CXYmjKAd0XsLliR7bDCa+9cHy1USvqJNfQctXQXogUvIqQUvzMYhThfF+MXZx8ZJbMquXwDGslooGDw477K5taMc4vBdjZ044CWkJoLn77hZgUFtFovHxLonNR9YUWYh+eu4k42hhg2pbTJwg5iCWohLS95VnZYAnjcn5zIlLPRdFhl/9o0Tl8vdH2QVRk33+KwK1fDXWJvbG45jD84+rFPEHJh5B8cEAgFRJv0wsB6rbl+WDo51Q1mcKvDj/ZB6nGswUA3LRl1rtE+/Kgl79qkQuUI0WMowEhm7zTVGpsOnQ+KMNSs0PwKeZLX4PSvmuGOAjUODdu6bmpRLY5EK2aAC/fqy86jO1q/0dAeEYuXzjJ21EEuDWUPBc6yVH/ANhq52bR+rD72JrMx5v/3A5xvmdzXqwIBkEJjmrz2YWIt0Fvbu83k9DiC1JcbgIGOgSimwQEtArgUU2Kv6drCZc9IVGM6FrIOkJaDgykwAMHVC432OzS0QIMYzFO3X3AAXN3WHO1iE2W0Ymbq9NX5w1EvlKF8v0LJ+FFwBTujr40eiajeBRhumg5mD77tWwnOoubSas2W2t9CgxnFc4poe3AALqkFbpKYscT401k8oLEQHE83sdhgzMmZkYX8PAuKx2E='
|
10 |
|
11 |
+
docType: b'ahk6WI86pWF6VtFolNg9m3+L7zMQO0cigoUbglHsOl6OpqY8/GlR7neLgFpo5N0DcVXS3icYJ6mNNpFn1Nt0I5V/L6vx8e4WOymskAxgXd+LjYTNKaXcnef1cZDR9P040cicPWNahhZ7ZrgNk9CkWWpa3GYGggLuOM9ZKOXbuQlDwFl8Hv1RH0erg15DuYUC7M6ugSQfRK1bK84iq2ZDDnNJNMQNQlzZ1KzzmXJIK4EWa/JJMwwkK9h4XwZIBExtNVvMAPtyrMIUuHtxay+V3+Qs7g+B4E5maZQBFCqkPbqaU5TmN7HChC95cQGU0mG7LIZ+UH+WbSa3lCijSEHkQ5VbDdPebXib7tA0uKkaICs5CA600p6WFTccj3wZvPpRF4mrMOwz+GgcqBKWJLSy+bjqmqRyKcFApWE4DRtsaL3aJ4uW5wDPbiLWX210EjHanff2FcS+Ab3AA2HM+BZaTsynAmJZ6QXfbtswn4xZr2GmzdYjw1yTFkUCF4NuA+vaaVk8GZHctMSY0PY0vXI7Tk2ZkV2YCClJ9+y8W5nrBQTlUmYuFR4c0SQOGJS7g88SXeEju/hKWo3eqbhZ+DFdizBrxrWn+Ysi04QQJElv9oHXw14Kq5S9DBkwR4AV0KH21gJqmyxSYWEPX+/ejrY5pM9MB+Kaf0P/0uqp0Eb+3jE='
|
12 |
|
13 |
+
nameDoc: b'eKA52KyqncSq/E3gOyL9mirv8h6vRGqRiiqXpD0fcF8g2KNwHKSh0z1VsXluWeIOpKMmPpsQv5+zoOCGt7CwIc0ZtkBauu3TYOyzOd6uc2v9K43Dj1adK+UrI46Sq2IQMwXvzdmJ4vBI8DXEFiIT7dzK+bCAKVBY0EAC37T7wEczsiRkiUta/dNWl+bxNmcO7hrUi0rj4M+sVKFhf3p+75jXiNi/QwomJWD2OQ84oh/n4rHl7D0OwdZ5K3KCaHPr30vUtC5JLFFKnEwbTxznXqfBes649RHyf4vxKf/gF4Z8yZwAKUUJ5Ez9hEKzSvG3htIOHDGD+E/LyiFoqJgXsh38XRjl7cQNPn+f9IAcsNQbDVtxQ6qdufRbnoMph3K/OyCckntUruf+lEkiBh7uRWGyHdIFHrBk7LPYtNMg4msLCsrpt1/IRnd7A07MWHRiiJ6K5I+cCOYdc5MHTdfxGiPdHcUkctKTkmmpJhCRsJbRksl+dkows60CaemHDW1aH/aygnhdmO9E9MfxdDqfleroV81zeUl+BtCjXVuFISIcXnOr+hJttV+k/9jvRJZiTfMR2PI8PQKBw0I8uytrI3kMMjGzIVr9uhcXtx0H+VP2hJQkuaLOJfgI1bxLUM4CLgHLxpPxYjr00sbe9LJroVBYgfKmwcyaiD3wu6azliQ='
|
14 |
|
15 |
+
docID: b'R3UEdGUG+omUgvioLJKdaJHYWVO4430ahCViGppAmh28g1S8emz5D12SmTSTiHansmeg1VQzC3AD+J7IcbZKLhtvxQ0fWCBfC6OctTp72IU7FbOZNnrFv8CdwcBFKhnqhV8YUWRFBKwn0GIPl6JeXAwptw6xn/DNQOIUdollyTSEyw5LyKKS5DFVRnYPKakSCm61oaSWech8C/0H8sqVPUHEqq98NIQWuU8fY1XprECBXpfszaADrPDpb34EoeOQnybGYNMiloY5quCUBqExeyOD5/seJWmh0fZuwp2p59YO693fPJOfIskIoWTHVVXev3+0UuFoXuBCQQFUdZ3rrR1aFG5ypD9LBx8MflnL7/W6tT0IiUWWpNk2sEvT7GZcecpt5biUZ6VoL+tINr3cOiRneri5lahh+nnUoAksEBtqRW4u096HsJHJ1SLyUEFV/no520ZjC0VtDN+ahakRn5VJZlKY66/Lwt6bKhXOkxwGLTYVvJLmSw5YQnAEryvHUeHYPyr18fFOZO0hcrYyADQi7Au2NFXlMZ3MTgMQKA5q6DFAGu6OjO75YpK0vJX+1cJgyOeaFe+Mih+0efw7M5LVrmEP+UXu4X3YZQrxZyYfD2ljLZrkeo3mASPP8QmWQGwoyE++BfZgVQo3ePZYg8R+I5zXMq5p/ED80z1Hq0U='
|
16 |
|
17 |
+
docValidity: b'OO6lVIP3D6UJflecoJm+OzNNE8PJW1xhqcDXqCL3SHOYtEYNGQvpZWa6Hr3TgGtLFuXSyPbokv38Pndb+QBv/49AXWZ1Szhj2QajUS6hIq6UImSNAL8p+z5ljiqRGc1G6jzLIyGabgBf3leY6oszJy5GfEwKGQdjKCDd+pZZUVEkHhOHAtpycmpHH7I3W+bkqnB75NU4PmNHK9PgmckY1xNsZrMAMB4MSr2Bix8rlPxkICitCP9WDYV/VXKwuDCXHG0mCCVUi1vvPcLhg1IZNO5//79bJd0FGlrZe13waj4fWvobjSNO9oV/D9kFsensaXDJUduR/ohdIYWQVL5Sx//oK/pcrJa/cwyl/K6UUtAymR88ysleqoCKgcKn/5C7ZxKn36ANKhu72l0Sopbtar7GZu0IxxjmNdkT93K4y+s8xdkzfpelZSM+vmeUZht7KAQYW+iQiyXjT4d6VNju7WtR6j1IfGtXwsp3tc+i5Bu7Kl7026FLqn6iKkC6fF0TFBWR09SHFS1dru4aSiI6qErxw4Y5rxU9FQT6dmVLMO3gXZEg2FiBVBz8T10MoTG2Gu0LvXjmbeC/EzM0bNm62J3N3hwa77W3+RWJBG1FPDmqee7fgNr1gzOAKiTa5Pn9qVctJuqBkBI0wfWVM8uguLK8ibkf/r/zNznpoyFXMYQ='
|
18 |
|
19 |
+
dateOfIssue: b'Rr7anhZoXVS8vmMss8WxyJnugeVgp7HqjrhMxO3raAF3tFSGKcn9cEzcTjSlB2l2LbSyJQCCF+5fqSK5uFLQfn6kcAhwL4zuPhzmxfZBhX6PSPvB3hd5bjWAT8N/mRbrCP/3eWReMgFw/UXYbf/DooD609EgBSyzMpGm81SvYVsQIqDLZNcEEsZEj1+HtfIFS6sTYPllgSn3Za4TgBvgxwEJm8G7b5QL5WCDRetRB4U+OndGl6byvUAFD7tU9n1DE8RxLV+hZN7sLwvkpqXP+6Bm7k4b2kI5gcL2Wyxg0ryCoa00u92aPrDnBH6D4hvFtbJEI9iqFviL2+K7qvgLN1B8S81A9KILrHctfcvn5prVI9pUmu4tpS/aW3QWFlgKxnAX0o0ytDijzLOm8fmiWctID/3ONjFMnEuaqqXdpUv4JaDW/XY/LTUIq4/BYwjaV4mFSY9Qa/FmOIyr2sRHlIJIa20W7MfZKTTQqW+lt0Q2EtsNoi5r4MfXHwy/qx6Huy5Mlyu8A2oLJNkNKIoXcmr+sSzqeHZHmiipqCC0ln/1rm4ZTx1wCYP8EemMyJn7rjad8Id7rMmsxSottVmm3LTzA7oS3IRQaEP/zkqZzSBlVXi44HuZW3EAEAVC6gtklV2avAMsbwBzg8ek1NDzfEG3Dv+KSniSGA0tuJrRVOs='
|
20 |
|
21 |
+
matchingScore: b'Qx3QwN0J3uFFHiS9XIp6P2zmvDTbNv2hXB1J/6fJDDNHBG2VCjhGiaYOQgkifMgu/Q5iBO19WWy0GAEekcaD9dGeacVk0dJRc5GvN4+RHJ25ESJeApuwGokHgweoEAJhydgjqPnnPxQkKTOUlETXdvfgF9QDvzhyhffbzU7rDzrzjhrYc+C1Zf+dsQfGjEboB8eLR9+IgL0lHDWt+BBk8yEk8tRA0XLFS+UNlha2GOajOVO8BcuxsvGg1wEbEYSJcQJXWza9WJ1Wk/wjzAQoGN2uNdE+A7mU+aRrpx4hcTvIIHfz4E0NyMUxLiLv0K+wtmMdjC7xLJD0T2O7VuvgBYgB4Y5wjWNtnD2IRilPMrV4/Dssyfvtl0v2maW9+nr4seXq6b8kGl4Rq42IVYQzZvqqWp1ghERQeRfgTZSPGe5l8aJ5U5Ejf9a9CzATrz018XHPpjevZyNTjmRo6b2+2YNVCTnCkMkrzkUJUowX7Qb+zbYG+30EvL5AJ/n9tjxHC7LwnaLftpMz/ppq0q+SFDEf411pe8I3lrEWoSCguReE5hsZlY0C/TDvN1khrABc+cttMFVnr1UDc3tQcF4Kgp9pRUZuyfUWiApOE+Km72iH2cNgYm/jtJq6kB39Ut3njBJUTn4hAylJ9m58PnQsWB40ZFDdaHJ++LA+YBLfeWQ='
|
22 |
+
|
23 |
+
bank: b'VoePsCXu7dmzZuwI0JCUGQXXDPOTS4i6g/ZYpp727Gx40wezdusISq59595JlIIQJ2FZCS5NVe3Gf946KiCxMupP+Iz1BsJTCjE2Ty5yjHSiUgwETBIriEEhGJaUHCxcwHUC5329weETUVSzS4tDXSTQlAALQ+6a7qsjrHZqSBUkb4W46LveMEavSgHgHGDvRGfr1sYAcVSeL1u1x7LFzUBQYqfV1RWqpu8oqcNi/ETJseeA2ni2NsFdcetBT/U46GsrS9RIeu0y6siXMJ8AR8/L7AaliRbtWWiMb1N5JklmvQ6GQcfy+cBZihzHNOr7JcalcrZ2BDVtRRl+1EGa4g04kyZcTV1js46tTtgitY6MFjYWq4Hv7KiwUTnu93LZU1V297gFMPaaOAqyGBDgf/4sy9pe+3H3+ITGZuoxsGa/k70YmlUuKVb8y6XEUN7tIJm0HC9l7FPcfmaE3Qc80YNdNLBNgzZD8zgMJ4qXuQxf2Wts97AjO3XkJIayKiWe8IAfHddFhsF+tWUY6Pyc6b2fWEPZemISXJNnOPkjX5NTuKWRYKZ66qITOQF0zMWguQoYPcuwDKvp+mMKM6jF9DUeC80PYpMhZNrx3eRlvQNTXpz0wrwJwI2Q+FVXok3vz25yXQFDGnFN/mt39R/8BmD2AFG5IoqvcypWF5CPZfw='
|
24 |
+
|
25 |
+
nameStatement: b'aCuZ8Tx2pYRJ6+uXF1WC/WwCSSY1LsKw5x9b6oCvUtorklPEQ5lVFpaGMrn4E2OxXHJ5vyU1IU9AlW44hxLaIYUBFkhaMIwkJQJ/E5hI7UcGiHvWcdmGHrqoVa3DhS4Uh+cxPOEz9KMMfD1j9utHONpcdOmfPRh8tCvHW5FlZcnUJZ0gGTUFsZhWGuJvq50gLQyihuJXT0B6gATCAZ377n0RqM0NvW3D+pgQvy8uESJ7dD57thVDi2UVHK8ej0Vuf5ZHtlx0MI+EnypXmNXA1D7PpVaGVMDKf82KoiYJcglQc53IP7ycdqw3PDhHG5MRVkhTZ6C055X4bTgqqSxSoRnZwzVdB0INFpZOKRgJobPeYMrDzD3s3Pg0jLF5S1mgs06JuG9bjppgudgkVrSE0wZf1+j8kkfKUo/hFj7dCxmkX+CuI8XTbZNzX117eJ4+aoX5M1cojByRoiyDZDtE0FrgGyDJJR7s54zNSxdyiRbw1kONFw7JX0Qmjd0e0gS+KJjXr69uE63XA93jwhv5CmVb95z7iQx8FHKWM96p78Rek1NC9r4YLhtCWj6SC3EbYUVqnidbnp3+A2Y0GmnBmKXwgdmdFSVcOR8Cpmxn8N+6bwPWcOTnqSeXKshRn7Uu7+VhIjVwudtTUSYr4hjoPrFp2/VyNXB4c5jHtE5x/qk='
|
26 |
+
|
27 |
+
address: b'XhC2JH0Txcj5K43hH3D99ZMzheaMZMSjJ1sRcXEhWqayh4eBJHDxiT1ZcUaPvhLvxlsmEyw5Q6DCGFPDbC8TidKPwrh8zvam3PSdN13omGFlHuGLhAwmClduY+2dzgW0via6zRHOz5tfpe2Sj/2mszmLa7Yf8/GOwKmsP3y2DYB1av4T/fpwosuBT9hVZDXQdUhlBekHYbW4aWEJLP+1Fq6SfxVxnw7eRlSrNjBzdpD3K7qnJxJl/jtICd9LlDZAdNIUv48prGtQ+XdgFmQOG7y0C2agi23dLO+LamPBn3vpJBoZJpjt3wej4wyqXPL6sMKcB5HS/6qY7Thh8qiI4AgnmjfJUFIlJBKdm89hu/BnvZeF/Y1sS+uqPT85ZoMidXs/+z7xYZ0d1buPZO6vaJ0bSImbTKV8CbFgBg0mCYjcH5Da91NWQ1tM/BE8xYPFb68ODGcJSiuI4+6jnrypiwFUHvxMLSt+tR6Mo6+n8XFTx0gQuD40gUmFtCZ7SEcpIXfFZRh8IFLqaCLB6hQGj3TXevxcdAeDvAMreF/LeBCf3rD6txQoQWaSEgIFKKfLpyOXEG04EIF2rOfwUT7Xhx3zRs051UuN1oa8GE2saBJodvzVVJAf9y/utdV61qP3DpMUGNwb93j1rxwf9j4HPv8d7d27I8bTN7xXEVU+bho='
|
28 |
+
|
29 |
+
asset: b'TSzUXkCvG7LBDUNb1K3ZYe/mlNsLjH0ylTY2nzkquaPM48PXeXxR9vOtAKzKQYk7Oq50rcDEntMd76CcS+PNhVvXbj9JTevNtPRHU7KjPgBmmSvuiDf6nSYGA049Q4U9Zf6AjrJi/hyKxXmi9mP8BkCSRsKhc4SwKW34VglgE6Ou2COds1rQ5+evNeRKMSpNI0rq+x+n+LftDUIa7phcqr+jtBB7Gq2KXcPmCloZcaiKyPQANNEPc8ZN1e9GiTy9ra0vqhuZjFxQdhJmcr6fWER1Xd6UYA9hNR88e08ldyUXZokJ6ssi0uNl69yR1ktZxrn3i2auyKvaaxMGmujvH/W6xSNHAIfx5PuQh87PmEmosodpK6VPvyjP1a9rDuRYbjWLI/kJe/OKWF5fCOx8+pj64vB4RZVYD6ZavfIsynGpAoY9y+ORmldi8DDesfDke87hbGcNyKyuISeIAkiBZz7TaHFiAgmMIxxcmyjLc0BB5B8UTyIF6p/gZBEcjqLtcqFBxPgUrgk4+1IuNaN8AgCk4xbq5X9a/QQ8sb59/NfkTB+DI62IqvCiXDKukNL1FRjJmpCqj138qI6bAiNY7OH1HeQrCvZ4FbZC3+cFhclSfBcRIEvsvtzrOkHIrScrPAhX0c1UZhU9XW7QQGyZqXs25pv14dWTTExu7/FntLs='
|
30 |
+
|
31 |
+
liability: b'F24LCecm2GPaWKHAWjrYIftVNCbbPYmvcwsMjruKqKfChBYSGQy+nrCOrIgp0wL5Asf4NF1qDuNeeQI5DuT4jTRxe5rR2Yxlqvzodhf6aGHWxzPz1VGZLaN7THRRq0EjL7ixNXz/6TeRKktP3Grhi+Jk5LUtGhIdUySGG1N96CINtE7a1zKfZBasyLyZ8PBqwYjOtSNX5naTAqPGCI6eGjqIY7fSKjb82EML8Xe+6uZVqqR2aNv2u8yzq8nglHH0J7i068SwpnDBT5jjdEH3pbyTsiLnmdHYQ8WGKG2SetSIMfxkkJqmCErbqH3F1uN/grzFsah8fHG6W/479yPcGsCnIFeUeCzPAihhHvUI21V39ADT9iG9V9sIXzPmo1BKT9JjKooRi+9+beOZT4YvjZz9pNbXanYL+mNW2qwimLDc3L4mrcoSQ0WlyXGefY2Sk8W9ePKtSrZRBIjzkRB10dAkicykCM0yts4sWxb2LMwFTJZRtXFV9Ay6xkiFx8mqeWHlZRThGx4aLcibIzTrwLBUzK3XeypUR/6ukpJvvaoDLlyoBQ1L+SE0lU0Wz8EDBYWMIrfw2dwP5jpT9ho0c+/bILr9DL93+1AVfwYqG8Uw1rVPPvLsZyOTIZqrUI2k8qZnsqIED41H9msI23YOVc665W/mvcafasjnQ9oTqL0='
|
32 |
+
|
33 |
+
statementDate: b'F0hBPc79PlZ3TE+jEY42dYDYQ3dzb5JrpLYmJaFgZS7Bumb7NjuyRKPPwLdILtYHfoSK2ZBQXLmplLY9J2k5UzMFwgQj/Oc/9tOJA6uHqCI0FjAx62Cu3PnECokXvKA1Cm1rNsY1IDt212eC5ghiiMF3pdKBW7jElIBsuDjJavtCnTF52QyXs7eVhAA0ZxhI4t0aP9TKEZINiGteczj36Of+UDgej3QR9/uK7Ds+FJFEQhMKWzNUM22AuXGGvhceAKiK2heOez8znCEtp/vvTRvYicYmRbNweQGjtrhCEVMeQ4UTGlOEqploOHuZ0ykU0TIUmjIPzPLUZsaKj8a4HK+ecEIntUWwCDtD76DHcbJ+ZzLLFYSfiIPTBTS88Pul2HqgnqilRufui1A6GEcBHluHQpnPrFth/FMB4Xuia8AvIkA5TfuKAneoq2Lwo3kD4/OrixfboF1d0NKm/tGmKhiCIWmhW6D1MtZv86rlVzsFrSS5UxjIQyHTuIYprsNerF7C0Uqy6llmdKkti2M9IJKc9T8pqReKevYmWxkkaQ9xd/orqpG9bPHYn3bcgj60FQ5x4zaytfF2rdHr6sTWCTfTb/Iz+yWBzmXS4kdV/V5lTPuGfp4uFyPI72nhPgGtQ6OCLuWkEmgjBK6VcZALY0TefZ8fByfsVLRNKJgnvVQ='
|
34 |
|
similarity_check.py
CHANGED
@@ -2,10 +2,15 @@ from model1 import model1
|
|
2 |
from model2 import model2
|
3 |
import checkTool as ct
|
4 |
import extract_pdf as pf
|
|
|
5 |
|
6 |
# get info from hkid card
|
7 |
|
8 |
def string_similarity(s1, s2): # Levenshtein distance algorithm
|
|
|
|
|
|
|
|
|
9 |
if s1 == s2:
|
10 |
return 100.0
|
11 |
|
@@ -32,7 +37,7 @@ def string_similarity(s1, s2): # Levenshtein distance algorithm
|
|
32 |
similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
|
33 |
return round(similarity, 1)
|
34 |
|
35 |
-
def get_data(img1_path, img2_path
|
36 |
|
37 |
# img_fp = 'IMG_4495.jpg'
|
38 |
|
@@ -58,8 +63,9 @@ def get_data(img1_path, img2_path, file_name):
|
|
58 |
# name = name.replace(' ', '')
|
59 |
# name = name.lower()
|
60 |
|
61 |
-
data = pf.get_info_from_bank(img2_path, file_name)
|
62 |
-
|
|
|
63 |
|
64 |
|
65 |
############# Similarity check ##############
|
|
|
2 |
from model2 import model2
|
3 |
import checkTool as ct
|
4 |
import extract_pdf as pf
|
5 |
+
import extraction_data as ed
|
6 |
|
7 |
# get info from hkid card
|
8 |
|
9 |
def string_similarity(s1, s2): # Levenshtein distance algorithm
|
10 |
+
s1 = s1.replace(' ', '')
|
11 |
+
s1 = s1.lower()
|
12 |
+
s2 = s2.replace(' ', '')
|
13 |
+
s2 = s2.lower()
|
14 |
if s1 == s2:
|
15 |
return 100.0
|
16 |
|
|
|
37 |
similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
|
38 |
return round(similarity, 1)
|
39 |
|
40 |
+
def get_data(img1_path, img2_path):
|
41 |
|
42 |
# img_fp = 'IMG_4495.jpg'
|
43 |
|
|
|
63 |
# name = name.replace(' ', '')
|
64 |
# name = name.lower()
|
65 |
|
66 |
+
# data = pf.get_info_from_bank(img2_path, file_name)
|
67 |
+
data = ed.get_info_from_bank(img2_path)
|
68 |
+
name = data["nameStatement"]
|
69 |
|
70 |
|
71 |
############# Similarity check ##############
|
test.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
import streamlit
|
|
|
2 |
|
3 |
-
print(
|
|
|
1 |
import streamlit
|
2 |
+
import mediapipe
|
3 |
|
4 |
+
print(mediapipe.__version__)
|
test_ocr.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import easyocr
|
2 |
+
reader = easyocr.Reader(['en'])
|
3 |
+
result = reader.readtext('hangseng_page-0001.jpg', detail = 0)
|
4 |
+
print(result)
|
webapp.py
CHANGED
@@ -11,12 +11,13 @@ import streamlit as st
|
|
11 |
import requests
|
12 |
import json
|
13 |
import request_json.sbt_request_generator as sbt
|
|
|
|
|
|
|
14 |
|
15 |
-
global data
|
16 |
-
data = {}
|
17 |
-
|
18 |
|
19 |
def main():
|
|
|
20 |
# st.title("SBT Web Application")
|
21 |
# today's date = get_today_date
|
22 |
|
@@ -30,22 +31,37 @@ def main():
|
|
30 |
"""
|
31 |
st.markdown(html_temp, unsafe_allow_html=True)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
st.header("I. Similarity Check")
|
34 |
-
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'], accept_multiple_files=True)
|
35 |
if len(image_file) == 1:
|
36 |
-
# print(image_file[0].name)
|
37 |
image1 = Image.open(image_file[0])
|
38 |
st.text("HKID card")
|
39 |
st.image(image1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
elif len(image_file) == 2:
|
41 |
image1 = Image.open(image_file[0])
|
42 |
st.text("HKID card")
|
43 |
st.image(image1)
|
44 |
image2 = Image.open(image_file[1])
|
45 |
-
|
|
|
|
|
46 |
st.text("Bank statement")
|
47 |
st.image(image2)
|
48 |
-
|
|
|
49 |
# if image_file2 is not None:
|
50 |
# image2 = Image.open(image_file)
|
51 |
# st.text("Bank statement")
|
@@ -60,15 +76,20 @@ def main():
|
|
60 |
if st.button("Recognise"):
|
61 |
with st.spinner('Wait for it...'):
|
62 |
# global data
|
63 |
-
data = sc.get_data(image1, image2
|
64 |
-
|
65 |
-
with open('data1.txt', 'w') as f:
|
66 |
-
|
67 |
# data.update(sc.get_data(image1, image2, file_name))
|
68 |
-
print(f'data inside {data}')
|
69 |
# sbt.split_data(data)
|
|
|
|
|
70 |
st.success('Done!')
|
71 |
-
|
|
|
|
|
|
|
72 |
#print(score)
|
73 |
st.text(f'score: {score}')
|
74 |
if (score>85):
|
@@ -76,6 +97,8 @@ def main():
|
|
76 |
else:
|
77 |
st.text(f'unmatched')
|
78 |
|
|
|
|
|
79 |
st.header("IIa. HKID Data Extraction")
|
80 |
st.text(f'Name: {data["name_on_id"]}') # name is without space
|
81 |
st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
|
@@ -83,16 +106,21 @@ def main():
|
|
83 |
|
84 |
st.header("IIb. Bank Statement Data Extraction")
|
85 |
# st.write('------------From bank statement------------')
|
86 |
-
st.text(f'Name: {data["
|
87 |
st.text(f'Address: {data["address"]}')
|
88 |
st.text(f'Bank: {data["bank"]}')
|
89 |
-
st.text(f'Date: {data["
|
90 |
-
st.text(f'Asset: {data["
|
91 |
-
st.text(f'Liabilities: {data["
|
92 |
# result_img= detect_faces(our_image)
|
93 |
# st.image(result_img)
|
94 |
# print(f'data outside 1 {data}')
|
|
|
|
|
|
|
|
|
95 |
|
|
|
96 |
st.header("II. Facial Recognition")
|
97 |
run = st.checkbox('Run')
|
98 |
|
@@ -127,18 +155,27 @@ def main():
|
|
127 |
print(score)
|
128 |
if len(score) > 20:
|
129 |
avg_score = sum(score) / len(score)
|
130 |
-
st.write(
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
|
138 |
# update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
|
139 |
else:
|
140 |
st.write('Stopped')
|
141 |
-
|
142 |
|
143 |
# print(f'the data is {data}')
|
144 |
|
@@ -149,7 +186,7 @@ def main():
|
|
149 |
|
150 |
# st.header("IIIb. Bank Statement Data Extraction")
|
151 |
# # st.write('------------From bank statement------------')
|
152 |
-
# st.text(f'Name: {data["
|
153 |
# st.text(f'Address: {data["address"]}')
|
154 |
# st.text(f'Bank: {data["bank"]}')
|
155 |
# st.text(f'Date: {data["date"]}')
|
@@ -160,7 +197,7 @@ def main():
|
|
160 |
if st.button("Confirm"):
|
161 |
# print(f'data outside 3 {data}')
|
162 |
with st.spinner('Sending data...'):
|
163 |
-
sbt.split_data(data)
|
164 |
st.success('Done!')
|
165 |
|
166 |
if __name__ == '__main__':
|
|
|
11 |
import requests
|
12 |
import json
|
13 |
import request_json.sbt_request_generator as sbt
|
14 |
+
import pathlib
|
15 |
+
import os
|
16 |
+
import check_hkid_validity as chv
|
17 |
|
|
|
|
|
|
|
18 |
|
19 |
def main():
|
20 |
+
|
21 |
# st.title("SBT Web Application")
|
22 |
# today's date = get_today_date
|
23 |
|
|
|
31 |
"""
|
32 |
st.markdown(html_temp, unsafe_allow_html=True)
|
33 |
|
34 |
+
if 'hkid_image_validity' not in st.session_state:
|
35 |
+
st.session_state.hkid_image_validity = False
|
36 |
+
|
37 |
+
if 'data' not in st.session_state:
|
38 |
+
st.session_state['data'] = {}
|
39 |
+
|
40 |
st.header("I. Similarity Check")
|
41 |
+
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg', 'pdf'], accept_multiple_files=True)
|
42 |
if len(image_file) == 1:
|
|
|
43 |
image1 = Image.open(image_file[0])
|
44 |
st.text("HKID card")
|
45 |
st.image(image1)
|
46 |
+
image1.save('image/hkid.jpg', 'JPEG')
|
47 |
+
if chv.check_hkid('image/hkid.jpg'):
|
48 |
+
st.text("Valid HKID card.")
|
49 |
+
st.session_state.hkid_image_validity = True
|
50 |
+
else:
|
51 |
+
st.text("Invalid HKID card. Please upload again!")
|
52 |
+
st.session_state.hkid_image_validity = False
|
53 |
elif len(image_file) == 2:
|
54 |
image1 = Image.open(image_file[0])
|
55 |
st.text("HKID card")
|
56 |
st.image(image1)
|
57 |
image2 = Image.open(image_file[1])
|
58 |
+
# image2 = image_file[1]
|
59 |
+
# image2.save('image/hkid.jpg', 'JPEG')
|
60 |
+
# file_name = image_file[1].name
|
61 |
st.text("Bank statement")
|
62 |
st.image(image2)
|
63 |
+
|
64 |
+
print(f"the id is: {st.session_state.hkid_image_validity}")
|
65 |
# if image_file2 is not None:
|
66 |
# image2 = Image.open(image_file)
|
67 |
# st.text("Bank statement")
|
|
|
76 |
if st.button("Recognise"):
|
77 |
with st.spinner('Wait for it...'):
|
78 |
# global data
|
79 |
+
data = sc.get_data(image1, image2)
|
80 |
+
# data = ed.get_info_from_bank('hsbc_one_account.pdf')
|
81 |
+
# with open('data1.txt', 'w') as f:
|
82 |
+
# f.write(json.dumps(data))
|
83 |
# data.update(sc.get_data(image1, image2, file_name))
|
84 |
+
# print(f'data inside {data}')
|
85 |
# sbt.split_data(data)
|
86 |
+
if 'data' in st.session_state:
|
87 |
+
st.session_state['data'] = data
|
88 |
st.success('Done!')
|
89 |
+
# if "similarity_score" not in data.keys():
|
90 |
+
# data["similarity_score"] = "0"
|
91 |
+
score = int(st.session_state['data']['similarity_score'])
|
92 |
+
# score = int(data["similarity_score"])
|
93 |
#print(score)
|
94 |
st.text(f'score: {score}')
|
95 |
if (score>85):
|
|
|
97 |
else:
|
98 |
st.text(f'unmatched')
|
99 |
|
100 |
+
|
101 |
+
data = st.session_state['data']
|
102 |
st.header("IIa. HKID Data Extraction")
|
103 |
st.text(f'Name: {data["name_on_id"]}') # name is without space
|
104 |
st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
|
|
|
106 |
|
107 |
st.header("IIb. Bank Statement Data Extraction")
|
108 |
# st.write('------------From bank statement------------')
|
109 |
+
st.text(f'Name: {data["nameStatement"]}')
|
110 |
st.text(f'Address: {data["address"]}')
|
111 |
st.text(f'Bank: {data["bank"]}')
|
112 |
+
st.text(f'Date: {data["statementDate"]}')
|
113 |
+
st.text(f'Asset: {data["totalAsset"]} hkd')
|
114 |
+
st.text(f'Liabilities: {data["totalLiability"]} hkd')
|
115 |
# result_img= detect_faces(our_image)
|
116 |
# st.image(result_img)
|
117 |
# print(f'data outside 1 {data}')
|
118 |
+
|
119 |
+
if 'data' in st.session_state:
|
120 |
+
tempout = st.session_state['data']
|
121 |
+
print(f'hello: {tempout}')
|
122 |
|
123 |
+
|
124 |
st.header("II. Facial Recognition")
|
125 |
run = st.checkbox('Run')
|
126 |
|
|
|
155 |
print(score)
|
156 |
if len(score) > 20:
|
157 |
avg_score = sum(score) / len(score)
|
158 |
+
st.write(avg_score)
|
159 |
+
# st.write(f'{demo.convert_distance_to_percentage(avg_score, 0.45)}')
|
160 |
+
camera.release()
|
161 |
+
run = not run
|
162 |
+
st.session_state['data']['avg_score'] = str(avg_score)
|
163 |
+
# with open('data1.txt', 'r') as f:
|
164 |
+
# if f is not None:
|
165 |
+
# data_raw = f.read()
|
166 |
+
# data = json.loads(data_raw)
|
167 |
+
# data['avg_score'] = str(avg_score)
|
168 |
+
# else:
|
169 |
+
# data = {}
|
170 |
+
|
171 |
+
|
172 |
+
# with open('data1.txt', 'w') as f:
|
173 |
+
# f.write(json.dumps(data))
|
174 |
|
175 |
|
176 |
# update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
|
177 |
else:
|
178 |
st.write('Stopped')
|
|
|
179 |
|
180 |
# print(f'the data is {data}')
|
181 |
|
|
|
186 |
|
187 |
# st.header("IIIb. Bank Statement Data Extraction")
|
188 |
# # st.write('------------From bank statement------------')
|
189 |
+
# st.text(f'Name: {data["nameStatement"]}')
|
190 |
# st.text(f'Address: {data["address"]}')
|
191 |
# st.text(f'Bank: {data["bank"]}')
|
192 |
# st.text(f'Date: {data["date"]}')
|
|
|
197 |
if st.button("Confirm"):
|
198 |
# print(f'data outside 3 {data}')
|
199 |
with st.spinner('Sending data...'):
|
200 |
+
sbt.split_data(st.session_state['data'])
|
201 |
st.success('Done!')
|
202 |
|
203 |
if __name__ == '__main__':
|