Spaces:
Runtime error
Runtime error
Commit
·
f87ab8f
0
Parent(s):
Duplicate from Alpaca233/ChatPDF-GUI
Browse filesCo-authored-by: Alpaca <Alpaca233@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +9 -0
- app.py +51 -0
- gpt_reader/__init__.py +0 -0
- gpt_reader/__pycache__/__init__.cpython-38.pyc +0 -0
- gpt_reader/__pycache__/__init__.cpython-39.pyc +0 -0
- gpt_reader/__pycache__/model_interface.cpython-38.pyc +0 -0
- gpt_reader/__pycache__/model_interface.cpython-39.pyc +0 -0
- gpt_reader/__pycache__/paper.cpython-38.pyc +0 -0
- gpt_reader/__pycache__/paper.cpython-39.pyc +0 -0
- gpt_reader/__pycache__/pdf_reader.cpython-38.pyc +0 -0
- gpt_reader/__pycache__/pdf_reader.cpython-39.pyc +0 -0
- gpt_reader/__pycache__/prompt.cpython-38.pyc +0 -0
- gpt_reader/__pycache__/prompt.cpython-39.pyc +0 -0
- gpt_reader/model_interface.py +32 -0
- gpt_reader/paper.py +20 -0
- gpt_reader/pdf_reader.py +121 -0
- gpt_reader/prompt.py +26 -0
- requirements.txt +5 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
sdk: gradio
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: red
|
6 |
+
pinned: false
|
7 |
+
app_file: app.py
|
8 |
+
duplicated_from: Alpaca233/ChatPDF-GUI
|
9 |
+
---
|
app.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from gpt_reader.pdf_reader import PaperReader
|
4 |
+
from gpt_reader.prompt import BASE_POINTS
|
5 |
+
|
6 |
+
|
7 |
+
class GUI:
|
8 |
+
def __init__(self):
|
9 |
+
self.api_key = ""
|
10 |
+
self.session = ""
|
11 |
+
|
12 |
+
def analyse(self, api_key, pdf_file):
|
13 |
+
self.session = PaperReader(api_key, points_to_focus=BASE_POINTS)
|
14 |
+
return self.session.read_pdf_and_summarize(pdf_file)
|
15 |
+
|
16 |
+
def ask_question(self, question):
|
17 |
+
if self.session == "":
|
18 |
+
return "Please upload PDF file first!"
|
19 |
+
return self.session.question(question)
|
20 |
+
|
21 |
+
|
22 |
+
with gr.Blocks() as demo:
|
23 |
+
gr.Markdown(
|
24 |
+
"""
|
25 |
+
# CHATGPT-PAPER-READER
|
26 |
+
""")
|
27 |
+
|
28 |
+
with gr.Tab("Upload PDF File"):
|
29 |
+
pdf_input = gr.File(label="PDF File")
|
30 |
+
api_input = gr.Textbox(label="OpenAI API Key")
|
31 |
+
result = gr.Textbox(label="PDF Summary")
|
32 |
+
upload_button = gr.Button("Start Analyse")
|
33 |
+
with gr.Tab("Ask question about your PDF"):
|
34 |
+
question_input = gr.Textbox(label="Your Question", placeholder="Authors of this paper?")
|
35 |
+
answer = gr.Textbox(label="Answer")
|
36 |
+
ask_button = gr.Button("Ask")
|
37 |
+
with gr.Accordion("About this project"):
|
38 |
+
gr.Markdown(
|
39 |
+
"""## CHATGPT-PAPER-READER📝
|
40 |
+
This repository provides a simple interface that utilizes the gpt-3.5-turbo
|
41 |
+
model to read academic papers in PDF format locally. You can use it to help you summarize papers,
|
42 |
+
create presentation slides, or simply fulfill tasks assigned by your supervisor.\n
|
43 |
+
[Github](https://github.com/talkingwallace/ChatGPT-Paper-Reader)""")
|
44 |
+
|
45 |
+
app = GUI()
|
46 |
+
upload_button.click(fn=app.analyse, inputs=[api_input, pdf_input], outputs=result)
|
47 |
+
ask_button.click(app.ask_question, inputs=question_input, outputs=answer)
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
demo.title = "CHATGPT-PAPER-READER"
|
51 |
+
demo.launch() # add "share=True" to share CHATGPT-PAPER-READER app on Internet.
|
gpt_reader/__init__.py
ADDED
File without changes
|
gpt_reader/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (148 Bytes). View file
|
|
gpt_reader/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (148 Bytes). View file
|
|
gpt_reader/__pycache__/model_interface.cpython-38.pyc
ADDED
Binary file (1.36 kB). View file
|
|
gpt_reader/__pycache__/model_interface.cpython-39.pyc
ADDED
Binary file (1.36 kB). View file
|
|
gpt_reader/__pycache__/paper.cpython-38.pyc
ADDED
Binary file (961 Bytes). View file
|
|
gpt_reader/__pycache__/paper.cpython-39.pyc
ADDED
Binary file (961 Bytes). View file
|
|
gpt_reader/__pycache__/pdf_reader.cpython-38.pyc
ADDED
Binary file (3.37 kB). View file
|
|
gpt_reader/__pycache__/pdf_reader.cpython-39.pyc
ADDED
Binary file (3.37 kB). View file
|
|
gpt_reader/__pycache__/prompt.cpython-38.pyc
ADDED
Binary file (1.28 kB). View file
|
|
gpt_reader/__pycache__/prompt.cpython-39.pyc
ADDED
Binary file (1.28 kB). View file
|
|
gpt_reader/model_interface.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import openai
|
3 |
+
|
4 |
+
|
5 |
+
class ModelInterface(object):
|
6 |
+
|
7 |
+
def __init__(self) -> None:
|
8 |
+
pass
|
9 |
+
|
10 |
+
def send_msg(self, *args):
|
11 |
+
pass
|
12 |
+
|
13 |
+
|
14 |
+
class OpenAIModel(object):
|
15 |
+
|
16 |
+
def __init__(self, api_key, model='gpt-3.5-turbo', temperature=0.2) -> None:
|
17 |
+
openai.api_key = api_key
|
18 |
+
self.model = model
|
19 |
+
self.temperature = temperature
|
20 |
+
|
21 |
+
def send_msg(self, msg: List[dict], return_raw_text=True):
|
22 |
+
|
23 |
+
response = openai.ChatCompletion.create(
|
24 |
+
model=self.model,
|
25 |
+
messages=msg,
|
26 |
+
temperature=self.temperature
|
27 |
+
)
|
28 |
+
|
29 |
+
if return_raw_text:
|
30 |
+
return response["choices"][0]["message"]["content"]
|
31 |
+
else:
|
32 |
+
return response
|
gpt_reader/paper.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
|
3 |
+
class Paper(object):
|
4 |
+
|
5 |
+
def __init__(self, pdf_obj: PdfReader) -> None:
|
6 |
+
self._pdf_obj = pdf_obj
|
7 |
+
self._paper_meta = self._pdf_obj.metadata
|
8 |
+
|
9 |
+
def iter_pages(self, iter_text_len: int = 3000):
|
10 |
+
page_idx = 0
|
11 |
+
for page in self._pdf_obj.pages:
|
12 |
+
txt = page.extract_text()
|
13 |
+
for i in range((len(txt) // iter_text_len) + 1):
|
14 |
+
yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len]
|
15 |
+
page_idx += 1
|
16 |
+
|
17 |
+
|
18 |
+
if __name__ == '__main__':
|
19 |
+
reader = PdfReader('../alexnet.pdf')
|
20 |
+
paper = Paper(reader)
|
gpt_reader/pdf_reader.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
import openai
|
3 |
+
from .prompt import BASE_POINTS, READING_PROMT_V2
|
4 |
+
from .paper import Paper
|
5 |
+
from .model_interface import OpenAIModel
|
6 |
+
|
7 |
+
|
8 |
+
# Setting the API key to use the OpenAI API
|
9 |
+
class PaperReader:
|
10 |
+
|
11 |
+
"""
|
12 |
+
A class for summarizing research papers using the OpenAI API.
|
13 |
+
|
14 |
+
Attributes:
|
15 |
+
openai_key (str): The API key to use the OpenAI API.
|
16 |
+
token_length (int): The length of text to send to the API at a time.
|
17 |
+
model (str): The GPT model to use for summarization.
|
18 |
+
points_to_focus (str): The key points to focus on while summarizing.
|
19 |
+
verbose (bool): A flag to enable/disable verbose logging.
|
20 |
+
|
21 |
+
"""
|
22 |
+
|
23 |
+
def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
|
24 |
+
points_to_focus=BASE_POINTS, verbose=False):
|
25 |
+
|
26 |
+
# Setting the API key to use the OpenAI API
|
27 |
+
openai.api_key = openai_key
|
28 |
+
|
29 |
+
# Initializing prompts for the conversation
|
30 |
+
self.init_prompt = READING_PROMT_V2.format(points_to_focus)
|
31 |
+
|
32 |
+
self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
|
33 |
+
self.messages = [] # Initializing the conversation messages
|
34 |
+
self.summary_msg = [] # Initializing the summary messages
|
35 |
+
self.token_len = token_length # Setting the token length to use
|
36 |
+
self.keep_round = 2 # Rounds of previous dialogues to keep in conversation
|
37 |
+
self.model = model # Setting the GPT model to use
|
38 |
+
self.verbose = verbose # Flag to enable/disable verbose logging
|
39 |
+
self.model = OpenAIModel(api_key=openai_key, model=model)
|
40 |
+
|
41 |
+
def drop_conversation(self, msg):
|
42 |
+
# This method is used to drop previous messages from the conversation and keep only recent ones
|
43 |
+
if len(msg) >= (self.keep_round + 1) * 2 + 1:
|
44 |
+
new_msg = [msg[0]]
|
45 |
+
for i in range(3, len(msg)):
|
46 |
+
new_msg.append(msg[i])
|
47 |
+
return new_msg
|
48 |
+
else:
|
49 |
+
return msg
|
50 |
+
|
51 |
+
def send_msg(self, msg):
|
52 |
+
return self.model.send_msg(msg)
|
53 |
+
|
54 |
+
def _chat(self, message):
|
55 |
+
# This method is used to send a message and get a response from the OpenAI API
|
56 |
+
|
57 |
+
# Adding the user message to the conversation messages
|
58 |
+
self.messages.append({"role": "user", "content": message})
|
59 |
+
# Sending the messages to the API and getting the response
|
60 |
+
response = self.send_msg(self.messages)
|
61 |
+
# Adding the system response to the conversation messages
|
62 |
+
self.messages.append({"role": "system", "content": response})
|
63 |
+
# Dropping previous conversation messages to keep the conversation history short
|
64 |
+
self.messages = self.drop_conversation(self.messages)
|
65 |
+
# Returning the system response
|
66 |
+
return response
|
67 |
+
|
68 |
+
def summarize(self, paper: Paper):
|
69 |
+
# This method is used to summarize a given research paper
|
70 |
+
|
71 |
+
# Adding the initial prompt to the conversation messages
|
72 |
+
self.messages = [
|
73 |
+
{"role": "system", "content": self.init_prompt},
|
74 |
+
]
|
75 |
+
# Adding the summary prompt to the summary messages
|
76 |
+
self.summary_msg = [{"role": "system", "content": self.summary_prompt}]
|
77 |
+
|
78 |
+
# Reading and summarizing each part of the research paper
|
79 |
+
for (page_idx, part_idx, text) in paper.iter_pages():
|
80 |
+
print('page: {}, part: {}'.format(page_idx, part_idx))
|
81 |
+
# Sending the text to the API and getting the response
|
82 |
+
summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text))
|
83 |
+
# Logging the summary if verbose logging is enabled
|
84 |
+
if self.verbose:
|
85 |
+
print(summary)
|
86 |
+
# Adding the summary of the part to the summary messages
|
87 |
+
self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})
|
88 |
+
|
89 |
+
# Adding a prompt for the user to summarize the whole paper to the summary messages
|
90 |
+
self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
|
91 |
+
# Sending the summary messages to the API and getting the response
|
92 |
+
result = self.send_msg(self.summary_msg)
|
93 |
+
# Returning the summary of the whole paper
|
94 |
+
return result
|
95 |
+
|
96 |
+
def read_pdf_and_summarize(self, pdf_path):
|
97 |
+
# This method is used to read a research paper from a PDF file and summarize it
|
98 |
+
|
99 |
+
# Creating a PdfReader object to read the PDF file
|
100 |
+
pdf_reader = PdfReader(pdf_path)
|
101 |
+
paper = Paper(pdf_reader)
|
102 |
+
# Summarizing the full text of the research paper and returning the summary
|
103 |
+
print('reading pdf finished')
|
104 |
+
summary = self.summarize(paper)
|
105 |
+
return summary
|
106 |
+
|
107 |
+
def get_summary_of_each_part(self):
|
108 |
+
# This method is used to get the summary of each part of the research paper
|
109 |
+
return self.summary_msg
|
110 |
+
|
111 |
+
def question(self, question):
|
112 |
+
# This method is used to ask a question after summarizing a paper
|
113 |
+
|
114 |
+
# Adding the question to the summary messages
|
115 |
+
self.summary_msg.append({"role": "user", "content": question})
|
116 |
+
# Sending the summary messages to the API and getting the response
|
117 |
+
response = self.send_msg(self.summary_msg)
|
118 |
+
# Adding the system response to the summary messages
|
119 |
+
self.summary_msg.append({"role": "system", "content": response})
|
120 |
+
# Returning the system response
|
121 |
+
return response
|
gpt_reader/prompt.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BASE_POINTS = """
|
2 |
+
1. Who are the authors?
|
3 |
+
2. What is the process of the proposed method?
|
4 |
+
3. What is the performance of the proposed method? Please note down its performance metrics.
|
5 |
+
4. What are the baseline models and their performances? Please note down these baseline methods.
|
6 |
+
5. What dataset did this paper use?
|
7 |
+
"""
|
8 |
+
|
9 |
+
READING_PROMPT = """
|
10 |
+
You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
|
11 |
+
Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
|
12 |
+
When you are reading, You need to focus on these key points:{}
|
13 |
+
"""
|
14 |
+
|
15 |
+
READING_PROMT_V2 = """
|
16 |
+
You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
|
17 |
+
Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
|
18 |
+
When you are reading, You need to focus on these key points:{},
|
19 |
+
|
20 |
+
And You need to generate a brief but informative title for this part.
|
21 |
+
Your return format:
|
22 |
+
- title: '...'
|
23 |
+
- summary: '...'
|
24 |
+
"""
|
25 |
+
|
26 |
+
SUMMARY_PROMPT = "You are a researcher helper bot. Now you need to read the summaries of a research paper."
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
gradio==3.15.0
|
3 |
+
openai==0.27.1
|
4 |
+
PyPDF2==3.0.1
|
5 |
+
|