esg_scraper / app.py
kkawamu1's picture
Add Google Finance
9146e95
raw
history blame
3.18 kB
import gradio as gr
import pandas as pd
import requests
from bs4 import BeautifulSoup
example_tickers = [
{"Ticker": "PLD", "Company Name": "Prologis Inc"},
{"Ticker": "PSA", "Company Name": "Public Storage"},
{"Ticker": "O", "Company Name": "Realty Income Corp"},
{
"Ticker": "META",
"Company Name": "Meta Platforms",
},
{"Ticker": "AMZN", "Company Name": "Amazon.com"},
{"Ticker": "MSFT", "Company Name": "Microsoft Corporation"},
]
def get_esg_from_yahoo_finance(row):
elements = []
# This is a standard user-agent of Chrome browser running on Windows 10
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}
html = requests.get(
"https://finance.yahoo.com/quote/" + row.Ticker + "/sustainability",
headers=headers,
).text
soup = BeautifulSoup(html, "html.parser")
scores = soup.find_all("div", {"class": "content svelte-y3c2sq"})
for score in scores:
elements.append(float(score.find("h4").text.strip()))
if elements:
row["Total ESG Risk Score"] = elements[0]
row["Environmental Risk Score"] = elements[1]
row["Social Risk Score"] = elements[2]
row["Governance Risk Score"] = elements[3]
else:
row["Total ESG Risk Score"] = None
row["Environmental Risk Score"] = None
row["Social Risk Score"] = None
row["Governance Risk Score"] = None
return row
def get_esg_score_from_google_finance(row):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36"
}
html = requests.get(
f"https://www.google.com/finance/quote/META:NASDAQ", headers=headers, timeout=30
).text
soup = BeautifulSoup(html, "html.parser")
scores = soup.find_all("div", {"class": "IPIeJ"})
row["CDP Score"] = scores[0].find("div", {"class": "P6K39c"}).text
return row
example_input_data = pd.DataFrame(example_tickers)
inputs = [
gr.Dataframe(
row_count=(6, "dynamic"),
col_count=(1, "dynamic"),
label="Input Data",
interactive=1,
)
]
outputs = [
gr.Dataframe(
row_count=(6, "dynamic"),
col_count=(7, "fixed"),
label="ESG Scores",
headers=[
"Ticker",
"Company Name",
"Total ESG Risk Score",
"Environmental Risk Score",
"Social Risk Score",
"Governance Risk Score",
"CDP Score",
],
)
]
def get_esg_scores(input_dataframe):
input_dataframe = input_dataframe.apply(
lambda x: get_esg_from_yahoo_finance(x), axis=1
)
input_dataframe = input_dataframe.apply(
lambda x: get_esg_score_from_google_finance(x), axis=1
)
return input_dataframe
gr.Interface(
fn=get_esg_scores,
inputs=inputs,
outputs=outputs,
title="🌳ESG Data Scraper🌳\n\nIt scrapes ESG ratings from Yahoo Finance and Google Finance!",
examples=[[example_input_data.head(6)]],
).launch()