ChatPDF-GUI

Runtime error

App Files Files Community

epochs-demos

Alpaca233 commited on May 19, 2023

Commit

f87ab8f

0 Parent(s):

Duplicate from Alpaca233/ChatPDF-GUI

Browse files

Co-authored-by: Alpaca <Alpaca233@users.noreply.huggingface.co>

Files changed (19) hide show

.gitattributes +34 -0
README.md +9 -0
app.py +51 -0
gpt_reader/__init__.py +0 -0
gpt_reader/__pycache__/__init__.cpython-38.pyc +0 -0
gpt_reader/__pycache__/__init__.cpython-39.pyc +0 -0
gpt_reader/__pycache__/model_interface.cpython-38.pyc +0 -0
gpt_reader/__pycache__/model_interface.cpython-39.pyc +0 -0
gpt_reader/__pycache__/paper.cpython-38.pyc +0 -0
gpt_reader/__pycache__/paper.cpython-39.pyc +0 -0
gpt_reader/__pycache__/pdf_reader.cpython-38.pyc +0 -0
gpt_reader/__pycache__/pdf_reader.cpython-39.pyc +0 -0
gpt_reader/__pycache__/prompt.cpython-38.pyc +0 -0
gpt_reader/__pycache__/prompt.cpython-39.pyc +0 -0
gpt_reader/model_interface.py +32 -0
gpt_reader/paper.py +20 -0
gpt_reader/pdf_reader.py +121 -0
gpt_reader/prompt.py +26 -0
requirements.txt +5 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+sdk: gradio
+emoji: 🚀
+colorFrom: red
+colorTo: red
+pinned: false
+app_file: app.py
+duplicated_from: Alpaca233/ChatPDF-GUI
+---

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import gradio as gr
+from gpt_reader.pdf_reader import PaperReader
+from gpt_reader.prompt import BASE_POINTS
+class GUI:
+    def __init__(self):
+        self.api_key = ""
+        self.session = ""
+    def analyse(self, api_key, pdf_file):
+        self.session = PaperReader(api_key, points_to_focus=BASE_POINTS)
+        return self.session.read_pdf_and_summarize(pdf_file)
+    def ask_question(self, question):
+        if self.session == "":
+            return "Please upload PDF file first!"
+        return self.session.question(question)
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # CHATGPT-PAPER-READER
+        """)
+    with gr.Tab("Upload PDF File"):
+        pdf_input = gr.File(label="PDF File")
+        api_input = gr.Textbox(label="OpenAI API Key")
+        result = gr.Textbox(label="PDF Summary")
+        upload_button = gr.Button("Start Analyse")
+    with gr.Tab("Ask question about your PDF"):
+        question_input = gr.Textbox(label="Your Question", placeholder="Authors of this paper?")
+        answer = gr.Textbox(label="Answer")
+        ask_button = gr.Button("Ask")
+    with gr.Accordion("About this project"):
+        gr.Markdown(
+            """## CHATGPT-PAPER-READER📝
+            This repository provides a simple interface that utilizes the gpt-3.5-turbo
+            model to read academic papers in PDF format locally. You can use it to help you summarize papers,
+            create presentation slides, or simply fulfill tasks assigned by your supervisor.\n
+            [Github](https://github.com/talkingwallace/ChatGPT-Paper-Reader)""")
+    app = GUI()
+    upload_button.click(fn=app.analyse, inputs=[api_input, pdf_input], outputs=result)
+    ask_button.click(app.ask_question, inputs=question_input, outputs=answer)
+if __name__ == "__main__":
+    demo.title = "CHATGPT-PAPER-READER"
+    demo.launch()  # add "share=True" to share CHATGPT-PAPER-READER app on Internet.

gpt_reader/__init__.py ADDED Viewed

File without changes

gpt_reader/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (148 Bytes). View file

gpt_reader/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (148 Bytes). View file

gpt_reader/__pycache__/model_interface.cpython-38.pyc ADDED Viewed

Binary file (1.36 kB). View file

gpt_reader/__pycache__/model_interface.cpython-39.pyc ADDED Viewed

Binary file (1.36 kB). View file

gpt_reader/__pycache__/paper.cpython-38.pyc ADDED Viewed

Binary file (961 Bytes). View file

gpt_reader/__pycache__/paper.cpython-39.pyc ADDED Viewed

Binary file (961 Bytes). View file

gpt_reader/__pycache__/pdf_reader.cpython-38.pyc ADDED Viewed

Binary file (3.37 kB). View file

gpt_reader/__pycache__/pdf_reader.cpython-39.pyc ADDED Viewed

Binary file (3.37 kB). View file

gpt_reader/__pycache__/prompt.cpython-38.pyc ADDED Viewed

Binary file (1.28 kB). View file

gpt_reader/__pycache__/prompt.cpython-39.pyc ADDED Viewed

Binary file (1.28 kB). View file

gpt_reader/model_interface.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import List
+import openai
+class ModelInterface(object):
+    def __init__(self) -> None:
+        pass
+    def send_msg(self, *args):
+        pass
+class OpenAIModel(object):
+    def __init__(self, api_key, model='gpt-3.5-turbo', temperature=0.2) -> None:
+        openai.api_key = api_key
+        self.model = model
+        self.temperature = temperature
+    def send_msg(self, msg: List[dict], return_raw_text=True):
+        response = openai.ChatCompletion.create(
+            model=self.model,
+            messages=msg,
+            temperature=self.temperature
+        )
+        if return_raw_text:
+            return response["choices"][0]["message"]["content"]
+        else:
+            return response

gpt_reader/paper.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from PyPDF2 import PdfReader
+class Paper(object):
+    def __init__(self, pdf_obj: PdfReader) -> None:
+        self._pdf_obj = pdf_obj
+        self._paper_meta = self._pdf_obj.metadata
+    def iter_pages(self, iter_text_len: int = 3000):
+        page_idx = 0
+        for page in self._pdf_obj.pages:
+            txt = page.extract_text()
+            for i in range((len(txt) // iter_text_len) + 1):
+                yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len]
+            page_idx += 1
+if __name__ == '__main__':
+    reader = PdfReader('../alexnet.pdf')
+    paper = Paper(reader)

gpt_reader/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,121 @@

+from PyPDF2 import PdfReader
+import openai
+from .prompt import BASE_POINTS, READING_PROMT_V2
+from .paper import Paper
+from .model_interface import OpenAIModel
+# Setting the API key to use the OpenAI API
+class PaperReader:
+    """
+    A class for summarizing research papers using the OpenAI API.
+    Attributes:
+        openai_key (str): The API key to use the OpenAI API.
+        token_length (int): The length of text to send to the API at a time.
+        model (str): The GPT model to use for summarization.
+        points_to_focus (str): The key points to focus on while summarizing.
+        verbose (bool): A flag to enable/disable verbose logging.
+    """
+    def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
+                 points_to_focus=BASE_POINTS, verbose=False):
+        # Setting the API key to use the OpenAI API
+        openai.api_key = openai_key
+        # Initializing prompts for the conversation
+        self.init_prompt = READING_PROMT_V2.format(points_to_focus)
+        self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
+        self.messages = []  # Initializing the conversation messages
+        self.summary_msg = []  # Initializing the summary messages
+        self.token_len = token_length  # Setting the token length to use
+        self.keep_round = 2  # Rounds of previous dialogues to keep in conversation
+        self.model = model  # Setting the GPT model to use
+        self.verbose = verbose  # Flag to enable/disable verbose logging
+        self.model = OpenAIModel(api_key=openai_key, model=model)
+    def drop_conversation(self, msg):
+        # This method is used to drop previous messages from the conversation and keep only recent ones
+        if len(msg) >= (self.keep_round + 1) * 2 + 1:
+            new_msg = [msg[0]]
+            for i in range(3, len(msg)):
+                new_msg.append(msg[i])
+            return new_msg
+        else:
+            return msg
+    def send_msg(self, msg):
+        return self.model.send_msg(msg)
+    def _chat(self, message):
+        # This method is used to send a message and get a response from the OpenAI API
+        # Adding the user message to the conversation messages
+        self.messages.append({"role": "user", "content": message})
+        # Sending the messages to the API and getting the response
+        response = self.send_msg(self.messages)
+        # Adding the system response to the conversation messages
+        self.messages.append({"role": "system", "content": response})
+        # Dropping previous conversation messages to keep the conversation history short
+        self.messages = self.drop_conversation(self.messages)
+        # Returning the system response
+        return response
+    def summarize(self, paper: Paper):
+        # This method is used to summarize a given research paper
+        # Adding the initial prompt to the conversation messages
+        self.messages = [
+            {"role": "system", "content": self.init_prompt},
+        ]
+        # Adding the summary prompt to the summary messages
+        self.summary_msg = [{"role": "system", "content": self.summary_prompt}]
+         # Reading and summarizing each part of the research paper
+        for (page_idx, part_idx, text) in paper.iter_pages():
+            print('page: {}, part: {}'.format(page_idx, part_idx))
+            # Sending the text to the API and getting the response
+            summary = self._chat('now I send you page {}, part {}：{}'.format(page_idx, part_idx, text))
+            # Logging the summary if verbose logging is enabled
+            if self.verbose:
+                print(summary)
+            # Adding the summary of the part to the summary messages
+            self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})
+        # Adding a prompt for the user to summarize the whole paper to the summary messages
+        self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
+        # Sending the summary messages to the API and getting the response
+        result = self.send_msg(self.summary_msg)
+        # Returning the summary of the whole paper
+        return result
+    def read_pdf_and_summarize(self, pdf_path):
+        # This method is used to read a research paper from a PDF file and summarize it
+        # Creating a PdfReader object to read the PDF file
+        pdf_reader = PdfReader(pdf_path)
+        paper = Paper(pdf_reader)
+        # Summarizing the full text of the research paper and returning the summary
+        print('reading pdf finished')
+        summary = self.summarize(paper)
+        return summary
+    def get_summary_of_each_part(self):
+        # This method is used to get the summary of each part of the research paper
+        return self.summary_msg
+    def question(self, question):
+        # This method is used to ask a question after summarizing a paper
+        # Adding the question to the summary messages
+        self.summary_msg.append({"role": "user", "content": question})
+        # Sending the summary messages to the API and getting the response
+        response = self.send_msg(self.summary_msg)
+        # Adding the system response to the summary messages
+        self.summary_msg.append({"role": "system", "content": response})
+        # Returning the system response
+        return response

gpt_reader/prompt.py ADDED Viewed

	@@ -0,0 +1,26 @@

+BASE_POINTS = """
+1. Who are the authors?
+2. What is the process of the proposed method?
+3. What is the performance of the proposed method? Please note down its performance metrics.
+4. What are the baseline models and their performances? Please note down these baseline methods.
+5. What dataset did this paper use?
+"""
+READING_PROMPT = """
+You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
+Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
+When you are reading, You need to focus on these key points:{}
+"""
+READING_PROMT_V2 = """
+You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
+Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
+When you are reading, You need to focus on these key points:{},
+And You need to generate a brief but informative title for this part.
+Your return format:
+- title: '...'
+- summary: '...'
+"""
+SUMMARY_PROMPT = "You are a researcher helper bot. Now you need to read the summaries of a research paper."

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==3.15.0
+openai==0.27.1
+PyPDF2==3.0.1