# os.system("pip install langchain-openai") from langchain_openai import AzureChatOpenAI import os import pdfplumber from langchain.chains.mapreduce import MapReduceChain from langchain.text_splitter import CharacterTextSplitter from langchain.chains.summarize import load_summarize_chain from langchain_community.document_loaders import UnstructuredFileLoader from langchain.prompts import PromptTemplate import logging import json from typing import List import mimetypes import validators import requests import tempfile from langchain.chains import create_extraction_chain from GoogleNews import GoogleNews import pandas as pd import requests import gradio as gr import re from langchain_community.document_loaders import WebBaseLoader from langchain.chains.combine_documents.stuff import StuffDocumentsChain from transformers import pipeline import plotly.express as px from langchain_community.document_loaders import CSVLoader from langchain_community.chat_models import ChatOpenAI from langchain.chains.llm import LLMChain import yfinance as yf import pandas as pd import nltk from nltk.tokenize import sent_tokenize from openai import AzureOpenAI from langchain.prompts import PromptTemplate from langchain.chains import load_summarize_chain from langchain.chat_models import AzureChatOpenAI class KeyValueExtractor: def __init__(self): """ Initialize the ContractSummarizer object. Parameters: pdf_file_path (str): The path to the input PDF file. """ self.model = "facebook/bart-large-mnli" self.client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"), api_version="2023-07-01-preview", azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") ) def get_url(self,keyword): return f"https://finance.yahoo.com/quote/{keyword}?p={keyword}" def get_each_link_summary(self,url): loader = WebBaseLoader(url) docs = loader.load() text_splitter = CharacterTextSplitter.from_tiktoken_encoder( chunk_size=3000, chunk_overlap=200 ) # Split the documents into chunks split_docs = text_splitter.split_documents(docs) # Prepare the prompt template for summarization prompt_template = """The give text is Finance Stock Details for one company i want to get values for Previous Close : [value] Open : [value] Bid : [value] Ask : [value] Day's Range : [value] 52 Week Range : [value] Volume : [value] Avg. Volume : [value] Market Cap : [value] Beta (5Y Monthly) : [value] PE Ratio (TTM) : [value] EPS (TTM) : [value] Earnings Date : [value] Forward Dividend & Yield : [value] Ex-Dividend Date : [value] 1y Target Est : [value] these details form that and Write a abractive summary about those details: Given Text: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) # Prepare the template for refining the summary with additional context refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary" "If the context isn't useful, return the original summary." ) refine_prompt = PromptTemplate.from_template(refine_template) # Load the summarization chain using the ChatOpenAI language model chain = load_summarize_chain( llm = AzureChatOpenAI(azure_deployment = "GPT-3"), chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) # Generate the refined summary using the loaded summarization chain result = chain({"input_documents": split_docs}, return_only_outputs=True) print(result["output_text"]) return result["output_text"] def one_day_summary(self,content) -> None: conversation = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```."} ] # Call OpenAI GPT-3.5-turbo chat_completion = self.client.chat.completions.create( model = "GPT-3", messages = conversation, max_tokens=1000, temperature=0 ) response = chat_completion.choices[0].message.content return response # # Use OpenAI's Completion API to analyze the text and extract key-value pairs # response = openai.Completion.create( # engine="text-davinci-003", # You can choose a different engine as well # temperature = 0, # prompt=f"i want detailed Summary from given finance details. i want information like what happen today comparing last day good or bad Bullish or Bearish like these details i want summary. content in backticks.```{content}```.", # max_tokens=1000 # You can adjust the length of the response # ) # # Extract and return the chatbot's reply # result = response['choices'][0]['text'].strip() # print(result) # return result def extract_key_value_pair(self,content) -> None: """ Extract key-value pairs from the refined summary. Prints the extracted key-value pairs. """ try: conversation = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"Get maximum count meaningfull key value pairs. content in backticks.```{content}```."} ] # Call OpenAI GPT-3.5-turbo chat_completion = self.client.chat.completions.create( model = "GPT-3", messages = conversation, max_tokens=1000, temperature=0 ) response = chat_completion.choices[0].message.content return response except Exception as e: # If an error occurs during the key-value extraction process, log the error logging.error(f"Error while extracting key-value pairs: {e}") print("Error:", e) def analyze_sentiment_for_graph(self, text): pipe = pipeline("zero-shot-classification", model=self.model) labels=["Positive", "Negative", "Neutral"] result = pipe(text, labels) sentiment_scores = { result['labels'][0]: result['scores'][0], result['labels'][1]: result['scores'][1], result['labels'][2]: result['scores'][2] } return sentiment_scores def display_graph(self,text): sentiment_scores = self.analyze_sentiment_for_graph(text) labels = sentiment_scores.keys() scores = sentiment_scores.values() fig = px.bar(x=scores, y=labels, orientation='h', color=labels, color_discrete_map={"Negative": "red", "Positive": "green", "Neutral": "gray"}) fig.update_traces(texttemplate='%{x:.2f}%', textposition='outside') fig.update_layout(title="Sentiment Analysis",width=800) formatted_pairs = [] for key, value in sentiment_scores.items(): formatted_value = round(value, 2) # Round the value to two decimal places formatted_pairs.append(f"{key} : {formatted_value}") result_string = '\t'.join(formatted_pairs) return fig def get_finance_data(self,symbol): # Define the stock symbol and date range start_date = '2022-08-19' end_date = '2023-08-19' # Fetch historical OHLC data using yfinance data = yf.download(symbol, start=start_date, end=end_date) # Select only the OHLC columns ohlc_data = data[['Open', 'High', 'Low', 'Close']] csv_path = "ohlc_data.csv" # Save the OHLC data to a CSV file ohlc_data.to_csv(csv_path) return csv_path def csv_to_dataframe(self,csv_path): # Replace 'your_file.csv' with the actual path to your CSV file csv_file_path = csv_path # Read the CSV file into a DataFrame df = pd.read_csv(csv_file_path) # Now you can work with the 'df' DataFrame return df # Display the first few rows of the DataFrame def save_dataframe_in_text_file(self,df): output_file_path = 'output.txt' # Convert the DataFrame to a text file df.to_csv(output_file_path, sep='\t', index=False) return output_file_path def csv_loader(self,output_file_path): loader = UnstructuredFileLoader(output_file_path, strategy="fast") docs = loader.load() return docs def document_text_spilliter(self,docs): """ Split documents into chunks for efficient processing. Returns: List[str]: List of split document chunks. """ # Initialize the text splitter with specified chunk size and overlap text_splitter = CharacterTextSplitter.from_tiktoken_encoder( chunk_size=1000, chunk_overlap=200 ) # Split the documents into chunks split_docs = text_splitter.split_documents(docs) # Return the list of split document chunks return split_docs def change_bullet_points(self,text): nltk.download('punkt') # Download the sentence tokenizer data (only need to run this once) # Example passage passage = text # Tokenize the passage into sentences sentences = sent_tokenize(passage) bullet_string = "" # Print the extracted sentences for sentence in sentences: bullet_string+="* "+sentence+"\n" return bullet_string def one_year_summary(self, keyword): try: # Step 1: Get the finance data and convert to DataFrame csv_path = self.get_finance_data(keyword) print(f"CSV path: {csv_path}") # For debugging, ensure it's correct. df = self.csv_to_dataframe(csv_path) if df is None or df.empty: raise ValueError("The DataFrame is empty. Please check the CSV content.") # Step 2: Save the DataFrame to a text file output_file_path = self.save_dataframe_in_text_file(df) print(f"Output file saved at: {output_file_path}") # Step 3: Load and split the document data docs = self.csv_loader(output_file_path) if not docs: raise ValueError("No content was loaded from the CSV file.") split_docs = self.document_text_spilliter(docs) if not split_docs: raise ValueError("Document splitting failed. No valid chunks were created.") # Step 4: Prepare the summarization prompt prompt_template = """Analyze the Financial Details and Write a brief and concise summary of how the company performed: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) # Step 5: Prepare the refine prompt for summarization chain refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary " "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary. " "If the context isn't useful, return the original summary." "10 lines of summary are enough." ) refine_prompt = PromptTemplate.from_template(refine_template) # Step 6: Load the summarization chain with Azure ChatGPT chain = load_summarize_chain( llm=AzureChatOpenAI(azure_deployment="GPT-3"), chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) # Step 7: Generate the summary result = chain({"input_documents": split_docs}, return_only_outputs=True) # Step 8: Process and return the summary one_year_perfomance_summary = self.change_bullet_points(result["output_text"]) # Log final summary print(f"Generated Summary: {one_year_perfomance_summary}") return one_year_perfomance_summary except Exception as e: print(f"Error during one_year_summary processing: {str(e)}") return None def main(self,keyword): clean_url = self.get_url(keyword) link_summary = self.get_each_link_summary(clean_url) clean_summary = self.one_day_summary(link_summary) key_value = self.extract_key_value_pair(clean_summary) return clean_summary, key_value def company_names(self,input_text): words = input_text.split("-") return words[1] def gradio_interface(self): with gr.Blocks(css="style.css",theme="freddyaboulton/test-blue") as app: gr.HTML("""