Spaces:

OneFi
/

hf-similarity-check

Sleeping

hf-similarity-check / search_engine.py

Mitul Mohammad Abdullah Al Mukit

updates

e029c8d over 1 year ago

4.85 kB

	import base64
	import os
	import rsa
	from datetime import date
	import secrets
	import string
	import requests
	import json
	from dotenv import load_dotenv
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.common.by import By
	import time
	from bs4 import BeautifulSoup
	import csv

	def generate_token_id(length):
	characters = string.ascii_letters + string.digits # + string.punctuation
	token = ''.join(secrets.choice(characters) for _ in range(length))
	return token

	# Examples for what will be generated
	# 5!bA9H2f1q^...
	# Xe7uM$4d9@...
	# &3yTb1*8Z#...
	# %pWqN7!6zX...
	# @9oV!s6Rd2...

	def get_today_date():
	today = date.today()
	return str(today)

	# Example for what will be returned
	# 2023-06-29

	def get_request(get_url, params):
	# get_url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT'

	pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem')

	with open(pubkey_path, 'rb') as f:
	pubKey = rsa.PublicKey.load_pkcs1(f.read())

	for key, value in params.items():
	value_bytes = value.encode("utf-8")
	encrypted_value = rsa.encrypt(value_bytes, pubKey)
	encoded_value = base64.b64encode(encrypted_value)
	params[key] = encoded_value

	# Write the encrypted and encoded values to a file
	with open("sbt_request.txt", "w") as f:
	for key, value in params.items():
	f.write(f"{key}: {value}\n\n")

	# posting Json file to api
	r = requests.get(get_url, params=params)# extracting data in json format
	data = r.json()
	print(f'get request: {data}')


	def post_request(post_url, data):
	# post_url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT'

	pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem')

	with open(pubkey_path, 'rb') as f:
	pubKey = rsa.PublicKey.load_pkcs1(f.read())

	for key, value in data.items():
	value_bytes = value.encode("utf-8")
	encrypted_value = rsa.encrypt(value_bytes, pubKey)
	encoded_value = base64.b64encode(encrypted_value)
	data[key] = encoded_value

	# Write the encrypted and encoded values to a file
	with open("sbt_request.txt", "w") as f:
	for key, value in data.items():
	f.write(f"{key}: {value}\n\n")

	# posting Json file to api
	r = requests.post(post_url, data=data)
	print(f'post request: {r.json}')


	def search_on_engine(search_data):
	# clearing csv file data
	csv_filename = "search_result.csv"
	f = open(csv_filename, "w+") # opening the file with w+ mode truncates the file
	# writer.writeheader()
	f.close()
	# set up driver
	driver = webdriver.Chrome()
	load_dotenv()
	search_url = os.environ.get("search-engine-url")
	driver.get(search_url)
	# assert "Python" in driver.title

	for search_tag_key in search_data:
	search_tag = search_data[search_tag_key]
	# Searching search tag in query
	elem = driver.find_element(By.NAME, "query")
	elem.clear()
	elem.send_keys(search_tag)
	submit_elem = driver.find_element(By.ID, "submit")
	submit_elem.click()
	time.sleep(8) # delay for 10 seconds: 2 seconds for Yahoo! search, 6 seconds for Google search
	assert "No results found." not in driver.page_source

	# extract link from page
	html = driver.page_source # search url remains the same after searching
	# time.sleep(10) # delay for 10 seconds: 2 seconds for Yahoo! search, 6 seconds for Google search
	soup = BeautifulSoup(html, 'html.parser')
	result_links = []
	for link in soup.find_all('a'):
	result_links.append(link.get('href'))
	# print(link.get('href'))

	# store in csv
	result_dict = {
	"tag": search_tag,
	"results": result_links
	}

	with open(csv_filename, 'a', encoding='UTF8', newline='') as f:
	# writer = csv.writer(f)
	writer = csv.DictWriter(f, fieldnames=["tag","results"])
	writer.writerow(result_dict)

	# save the link to db
	# post_request("post_url", result_link)


	def get_data_link(chinese_name, english_name, address):


	# get phone number from db
	# phone_no = get_request(get_url, english_name)
	phone_no = '12345678'
	# create data set
	search_data = {
	"chi_name": chinese_name,
	"eng_name": english_name,
	"address": address,
	"phone_number": phone_no
	}
	# pass data to search engine
	result_link = search_on_engine(search_data)
	# show the link
	# return result_link

	# get_data_link('劉柏政','Lau, Pak Ching','FLT 1939 18/F KM WAN HSE, CHOIHUNG EST 1 HUNG MUI AVENUE, NGAU CHI WAN KLN')
	# search_on_engine('Lau, Pak Ching')