|
import os |
|
import yfinance as yf |
|
import pandas as pd |
|
from newsapi import NewsApiClient |
|
from transformers import pipeline |
|
import tensorflow as tf |
|
from tensorflow import keras |
|
from sklearn.preprocessing import MinMaxScaler |
|
import numpy as np |
|
from datetime import datetime, timedelta |
|
import alpaca_trade_api as tradeapi |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
NEWSAPI_KEY = os.getenv('NEWSAPI_KEY', 'your_newsapi_key') |
|
ALPACA_API_KEY = os.getenv('ALPACA_API_KEY', 'your_alpaca_api_key') |
|
ALPACA_SECRET_KEY = os.getenv('ALPACA_SECRET_KEY', 'your_alpaca_secret_key') |
|
APCA_API_KEY_ID = os.getenv('APCA_API_KEY_ID', ALPACA_API_KEY) |
|
APCA_API_SECRET_KEY = os.getenv('APCA_API_SECRET_KEY', ALPACA_SECRET_KEY) |
|
|
|
|
|
if not all([NEWSAPI_KEY, APCA_API_KEY_ID, APCA_API_SECRET_KEY]): |
|
raise ValueError("Ensure all API keys and secret keys are set as environment variables.") |
|
|
|
|
|
newsapi = NewsApiClient(api_key=NEWSAPI_KEY) |
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
llm = HuggingFaceEndpoint( |
|
|
|
|
|
repo_id="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis", |
|
|
|
|
|
task="financial-analysis", |
|
max_new_tokens=512, |
|
top_k=5, |
|
temperature=0.2, |
|
repetition_penalty=1.03, |
|
timeout=300, |
|
huggingfacehub_api_token=HF_TOKEN |
|
) |
|
|
|
alpaca_api = tradeapi.REST(APCA_API_KEY_ID, APCA_API_SECRET_KEY, base_url='https://paper-api.alpaca.markets') |
|
|
|
def collect_market_data(ticker): |
|
data = yf.download(ticker, start=(datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d'), end=datetime.now().strftime('%Y-%m-%d')) |
|
data.to_csv(f'{ticker}_market_data.csv') |
|
logger.info(f'Market data for {ticker} collected successfully.') |
|
|
|
def collect_news_data(query, from_date, to_date): |
|
all_articles = newsapi.get_everything(q=query, from_param=from_date, to=to_date, language='en', sort_by='relevancy') |
|
if all_articles['status'] == 'ok': |
|
articles_df = pd.DataFrame(all_articles['articles']) |
|
articles_df.to_csv('news_data.csv') |
|
logger.info(f'News data for {query} collected successfully.') |
|
else: |
|
logger.error(f'Error collecting news data: {all_articles["message"]}') |
|
|
|
def perform_sentiment_analysis(): |
|
sentiment_pipeline = pipeline("sentiment-analysis") |
|
try: |
|
news_data = pd.read_csv('news_data.csv') |
|
news_data['sentiment'] = news_data['description'].apply(lambda x: sentiment_pipeline(x)[0]['label'] if pd.notna(x) else 'NEUTRAL') |
|
news_data.to_csv('sentiment_data.csv', index=False) |
|
logger.info('Sentiment analysis performed successfully.') |
|
except Exception as e: |
|
logger.error(f'Error performing sentiment analysis: {e}') |
|
|
|
def train_price_prediction_model(ticker): |
|
data = pd.read_csv(f'{ticker}_market_data.csv') |
|
data = data[['Date', 'Close']].set_index('Date') |
|
scaler = MinMaxScaler(feature_range=(0, 1)) |
|
scaled_data = scaler.fit_transform(data) |
|
|
|
X = [] |
|
y = [] |
|
|
|
for i in range(60, len(scaled_data)): |
|
X.append(scaled_data[i-60:i, 0]) |
|
y.append(scaled_data[i, 0]) |
|
|
|
X = np.array(X) |
|
y = np.array(y) |
|
X = np.reshape(X, (X.shape[0], X.shape[1], 1)) |
|
|
|
model = keras.Sequential([ |
|
keras.layers.LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)), |
|
keras.layers.LSTM(50, return_sequences=False), |
|
keras.layers.Dense(25), |
|
keras.layers.Dense(1) |
|
]) |
|
|
|
model.compile(optimizer='adam', loss='mean_squared_error') |
|
model.fit(X, y, batch_size=1, epochs=1) |
|
|
|
model.save(f'{ticker}_price_prediction_model.h5') |
|
logger.info('Price prediction model trained successfully.') |
|
|
|
def make_trade_decision(ticker): |
|
model = keras.models.load_model(f'{ticker}_price_prediction_model.h5') |
|
data = pd.read_csv(f'{ticker}_market_data.csv') |
|
last_60_days = data['Close'].tail(60).values |
|
last_60_days_scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(last_60_days.reshape(-1, 1)) |
|
|
|
X_test = [] |
|
X_test.append(last_60_days_scaled) |
|
X_test = np.array(X_test) |
|
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) |
|
|
|
predicted_price = model.predict(X_test) |
|
predicted_price = MinMaxScaler(feature_range=(0, 1)).inverse_transform(predicted_price) |
|
|
|
current_price = yf.download(ticker, period='1d')['Close'].values[0] |
|
|
|
if predicted_price > current_price: |
|
alpaca_api.submit_order( |
|
symbol=ticker, |
|
qty=1, |
|
side='buy', |
|
type='market', |
|
time_in_force='gtc' |
|
) |
|
logger.info(f'Bought 1 share of {ticker}') |
|
else: |
|
alpaca_api.submit_order( |
|
symbol=ticker, |
|
qty=1, |
|
side='sell', |
|
type='market', |
|
time_in_force='gtc' |
|
) |
|
logger.info(f'Sold 1 share of {ticker}') |
|
|
|
if __name__ == "__main__": |
|
TICKER = 'AAPL' |
|
QUERY = 'Apple Inc' |
|
FROM_DATE = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') |
|
TO_DATE = datetime.now().strftime('%Y-%m-%d') |
|
|
|
collect_market_data(TICKER) |
|
collect_news_data(QUERY, FROM_DATE, TO_DATE) |
|
perform_sentiment_analysis() |
|
train_price_prediction_model(TICKER) |
|
make_trade_decision(TICKER) |
|
|