File size: 6,060 Bytes
1609dbf c5f202a b7f33b4 1609dbf 8dcb148 1609dbf b7f33b4 1609dbf cf27d1b b7f33b4 005b424 1609dbf 8dcb148 4b6a96c 1609dbf cf27d1b 4b6a96c 1609dbf 4b6a96c 1609dbf cf27d1b b7f33b4 005b424 1609dbf 8dcb148 1609dbf cf27d1b 4b6a96c 1609dbf 4b6a96c 1609dbf 4b6a96c 1609dbf 4b6a96c 1609dbf 4b6a96c 1609dbf 4b6a96c 1609dbf 4b6a96c 1609dbf cf27d1b 4b6a96c c5f202a 4b6a96c 1609dbf 4b6a96c 1609dbf cf27d1b 1609dbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import pandas as pd
import pytz
import cloudscraper
import yfinance as yf
from typing import List
from types import SimpleNamespace
def clean_etf_data(df):
"""
Clean ETF data
"""
# Copy original
df_original = df.copy()
# Set date as index
df_original["Date"] = pd.to_datetime(df_original["Date"])
# Format outflow to negative value
df = df.drop(columns="Date")
df.replace(to_replace=r"\(([0-9.]+)\)", value=r"-\1", regex=True, inplace=True)
# Replace '-' with 0
df.replace("-", 0, inplace=True)
# Convert from strings to numeric
df = df.apply(pd.to_numeric)
df["Date"] = df_original["Date"]
return df, df_original
def extract_date_index(df):
"""
Extract index from dataframe as Date
"""
# Convert Series to DataFrame
if isinstance(df, pd.Series):
df = df.to_frame()
df = df.reset_index(names="Date")
# Set date as index
df.Date = pd.to_datetime(df.Date)
return df
FETCH_HEADER = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
}
def scrape(url: str):
scraper = cloudscraper.create_scraper()
return scraper.get(url)
def fetch_btc_etf():
url = "https://farside.co.uk/bitcoin-etf-flow-all-data/"
r = scrape(url)
print(r.status_code)
if r.status_code != 200:
print(r.content)
btc_etf_flow=pd.DataFrame.from_dict({"Date":["11 Jan 2024", "12 Jan 2024"], "Total": [0,0]})
btc_etf_flow, btc_etf_flow_original = clean_etf_data(btc_etf_flow)
return SimpleNamespace(
url=url,
flow=btc_etf_flow,
orig=btc_etf_flow_original,
funds=[],
)
# Get Bitcoin spot ETF history
btc_etf_flow = pd.read_html(
r.content,
attrs={"class": "etf"},
skiprows=[1],
)[0]
# Remove summary lines
btc_etf_flow = btc_etf_flow.iloc[:-4]
# Extract symbols of ETF funds
btc_etf_funds = btc_etf_flow.drop(columns=["Date", "Total"]).columns.to_list()
btc_etf_flow, btc_etf_flow_original = clean_etf_data(btc_etf_flow)
return SimpleNamespace(
url=url,
flow=btc_etf_flow,
orig=btc_etf_flow_original,
funds=btc_etf_funds,
)
def fetch_eth_etf():
url = "https://farside.co.uk/ethereum-etf-flow-all-data/"
r = scrape(url)
print(r.status_code)
if r.status_code != 200:
print(r.content)
eth_etf_flow=pd.DataFrame.from_dict({"Date":["11 Jan 2024", "12 Jan 2024"], "Total": [0,0]})
eth_etf_flow, eth_etf_flow_original = clean_etf_data(eth_etf_flow)
return SimpleNamespace(
url=url,
flow=eth_etf_flow,
orig=eth_etf_flow_original,
funds=[],
)
# Get Ethereum spot ETF history
eth_etf_flow = pd.read_html(
r.content,
attrs={"class": "etf"},
skiprows=[2, 3],
)[0]
# Drop column index level 2
eth_etf_flow.columns = eth_etf_flow.columns.droplevel(2)
# Extract symbols of ETF funds
eth_etf_funds = (
eth_etf_flow.drop(columns="Total").columns[1:].get_level_values(1).to_list()
)
# Merge multi-index columns
eth_etf_flow.columns = eth_etf_flow.columns.map(" - ".join)
# Name first column "Date"
eth_etf_flow.rename(
columns={
"Unnamed: 0_level_0 - Unnamed: 0_level_1": "Date",
"Total - Unnamed: 10_level_1": "Total",
},
inplace=True,
)
# Remove summary lines
eth_etf_flow = eth_etf_flow.iloc[:-1]
eth_etf_flow, eth_etf_flow_original = clean_etf_data(eth_etf_flow)
return SimpleNamespace(
url=url,
flow=eth_etf_flow,
orig=eth_etf_flow_original,
funds=eth_etf_funds,
)
def fetch_etf_volumes(funds: List[str], start_time=None):
etf_volumes = pd.DataFrame()
for fund in funds:
etf_volumes[fund] = yf.download(
str(fund),
interval="1d",
period="max",
start=start_time,
)["Volume"]
etf_volumes = extract_date_index(etf_volumes)
return etf_volumes
def fetch_asset_price(ticker: str, start_time=None):
price = yf.download(ticker, interval="1d", period="max", start=start_time)["Close"]
price = extract_date_index(price)
price.rename(columns={"Close": "Price"}, inplace=True)
return price
def fetch(asset):
if asset == "BTC":
df = fetch_btc_etf()
else:
df = fetch_eth_etf()
etf_flow, etf_funds, etf_url = df.flow, df.funds, df.url
tz = pytz.timezone("America/New_York")
etf_flow, etf_funds = df.flow, df.funds
tz = pytz.timezone("America/New_York")
start_time = tz.localize(etf_flow.Date[0])
etf_volumes = fetch_etf_volumes(etf_funds, start_time=start_time)
price = fetch_asset_price(f"{asset}-USD", start_time=start_time)
etf_flow_individual = etf_flow.drop(columns="Total")
etf_flow_total = etf_flow[["Date", "Total"]]
cum_flow_individual = etf_flow_individual.drop(columns="Date").cumsum()
cum_flow_individual["Date"] = etf_flow_individual.Date
cum_flow_total = pd.DataFrame(
{
"Date": etf_flow_total.Date,
"Total": etf_flow_total.Total.cumsum(),
}
)
return SimpleNamespace(
url=etf_url,
etf_flow=etf_flow,
etf_volumes=etf_volumes,
price=price,
etf_flow_individual=etf_flow_individual,
etf_flow_total=etf_flow_total,
cum_flow_individual=cum_flow_individual,
cum_flow_total=cum_flow_total,
)
|