Spaces:
Sleeping
Sleeping
import base64 | |
import os | |
import rsa | |
from datetime import date | |
import secrets | |
import string | |
import requests | |
import json | |
from dotenv import load_dotenv | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.by import By | |
import time | |
from bs4 import BeautifulSoup | |
import csv | |
def generate_token_id(length): | |
characters = string.ascii_letters + string.digits # + string.punctuation | |
token = ''.join(secrets.choice(characters) for _ in range(length)) | |
return token | |
# Examples for what will be generated | |
# 5!bA9H2f1q^... | |
# Xe7uM$4d9@... | |
# &3yTb1*8Z#... | |
# %pWqN7!6zX... | |
# @9oV!s6Rd2... | |
def get_today_date(): | |
today = date.today() | |
return str(today) | |
# Example for what will be returned | |
# 2023-06-29 | |
def get_request(get_url, params): | |
# get_url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT' | |
pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem') | |
with open(pubkey_path, 'rb') as f: | |
pubKey = rsa.PublicKey.load_pkcs1(f.read()) | |
for key, value in params.items(): | |
value_bytes = value.encode("utf-8") | |
encrypted_value = rsa.encrypt(value_bytes, pubKey) | |
encoded_value = base64.b64encode(encrypted_value) | |
params[key] = encoded_value | |
# Write the encrypted and encoded values to a file | |
with open("sbt_request.txt", "w") as f: | |
for key, value in params.items(): | |
f.write(f"{key}: {value}\n\n") | |
# posting Json file to api | |
r = requests.get(get_url, params=params)# extracting data in json format | |
data = r.json() | |
print(f'get request: {data}') | |
def post_request(post_url, data): | |
# post_url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT' | |
pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem') | |
with open(pubkey_path, 'rb') as f: | |
pubKey = rsa.PublicKey.load_pkcs1(f.read()) | |
for key, value in data.items(): | |
value_bytes = value.encode("utf-8") | |
encrypted_value = rsa.encrypt(value_bytes, pubKey) | |
encoded_value = base64.b64encode(encrypted_value) | |
data[key] = encoded_value | |
# Write the encrypted and encoded values to a file | |
with open("sbt_request.txt", "w") as f: | |
for key, value in data.items(): | |
f.write(f"{key}: {value}\n\n") | |
# posting Json file to api | |
r = requests.post(post_url, data=data) | |
print(f'post request: {r.json}') | |
def search_on_engine(search_data): | |
# clearing csv file data | |
csv_filename = "search_result.csv" | |
f = open(csv_filename, "w+") # opening the file with w+ mode truncates the file | |
# writer.writeheader() | |
f.close() | |
# set up driver | |
driver = webdriver.Chrome() | |
load_dotenv() | |
search_url = os.environ.get("search-engine-url") | |
driver.get(search_url) | |
# assert "Python" in driver.title | |
for search_tag_key in search_data: | |
search_tag = search_data[search_tag_key] | |
# Searching search tag in query | |
elem = driver.find_element(By.NAME, "query") | |
elem.clear() | |
elem.send_keys(search_tag) | |
submit_elem = driver.find_element(By.ID, "submit") | |
submit_elem.click() | |
time.sleep(8) # delay for 10 seconds: 2 seconds for Yahoo! search, 6 seconds for Google search | |
assert "No results found." not in driver.page_source | |
# extract link from page | |
html = driver.page_source # search url remains the same after searching | |
# time.sleep(10) # delay for 10 seconds: 2 seconds for Yahoo! search, 6 seconds for Google search | |
soup = BeautifulSoup(html, 'html.parser') | |
result_links = [] | |
for link in soup.find_all('a'): | |
result_links.append(link.get('href')) | |
# print(link.get('href')) | |
# store in csv | |
result_dict = { | |
"tag": search_tag, | |
"results": result_links | |
} | |
with open(csv_filename, 'a', encoding='UTF8', newline='') as f: | |
# writer = csv.writer(f) | |
writer = csv.DictWriter(f, fieldnames=["tag","results"]) | |
writer.writerow(result_dict) | |
# save the link to db | |
# post_request("post_url", result_link) | |
def get_data_link(chinese_name, english_name, address): | |
# get phone number from db | |
# phone_no = get_request(get_url, english_name) | |
phone_no = '12345678' | |
# create data set | |
search_data = { | |
"chi_name": chinese_name, | |
"eng_name": english_name, | |
"address": address, | |
"phone_number": phone_no | |
} | |
# pass data to search engine | |
result_link = search_on_engine(search_data) | |
# show the link | |
# return result_link | |
# get_data_link('劉柏政','Lau, Pak Ching','FLT 1939 18/F KM WAN HSE, CHOIHUNG EST 1 HUNG MUI AVENUE, NGAU CHI WAN KLN') | |
# search_on_engine('Lau, Pak Ching') |