epochs-demos Alpaca233 commited on
Commit
f87ab8f
·
0 Parent(s):

Duplicate from Alpaca233/ChatPDF-GUI

Browse files

Co-authored-by: Alpaca <Alpaca233@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ sdk: gradio
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: red
6
+ pinned: false
7
+ app_file: app.py
8
+ duplicated_from: Alpaca233/ChatPDF-GUI
9
+ ---
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from gpt_reader.pdf_reader import PaperReader
4
+ from gpt_reader.prompt import BASE_POINTS
5
+
6
+
7
+ class GUI:
8
+ def __init__(self):
9
+ self.api_key = ""
10
+ self.session = ""
11
+
12
+ def analyse(self, api_key, pdf_file):
13
+ self.session = PaperReader(api_key, points_to_focus=BASE_POINTS)
14
+ return self.session.read_pdf_and_summarize(pdf_file)
15
+
16
+ def ask_question(self, question):
17
+ if self.session == "":
18
+ return "Please upload PDF file first!"
19
+ return self.session.question(question)
20
+
21
+
22
+ with gr.Blocks() as demo:
23
+ gr.Markdown(
24
+ """
25
+ # CHATGPT-PAPER-READER
26
+ """)
27
+
28
+ with gr.Tab("Upload PDF File"):
29
+ pdf_input = gr.File(label="PDF File")
30
+ api_input = gr.Textbox(label="OpenAI API Key")
31
+ result = gr.Textbox(label="PDF Summary")
32
+ upload_button = gr.Button("Start Analyse")
33
+ with gr.Tab("Ask question about your PDF"):
34
+ question_input = gr.Textbox(label="Your Question", placeholder="Authors of this paper?")
35
+ answer = gr.Textbox(label="Answer")
36
+ ask_button = gr.Button("Ask")
37
+ with gr.Accordion("About this project"):
38
+ gr.Markdown(
39
+ """## CHATGPT-PAPER-READER📝
40
+ This repository provides a simple interface that utilizes the gpt-3.5-turbo
41
+ model to read academic papers in PDF format locally. You can use it to help you summarize papers,
42
+ create presentation slides, or simply fulfill tasks assigned by your supervisor.\n
43
+ [Github](https://github.com/talkingwallace/ChatGPT-Paper-Reader)""")
44
+
45
+ app = GUI()
46
+ upload_button.click(fn=app.analyse, inputs=[api_input, pdf_input], outputs=result)
47
+ ask_button.click(app.ask_question, inputs=question_input, outputs=answer)
48
+
49
+ if __name__ == "__main__":
50
+ demo.title = "CHATGPT-PAPER-READER"
51
+ demo.launch() # add "share=True" to share CHATGPT-PAPER-READER app on Internet.
gpt_reader/__init__.py ADDED
File without changes
gpt_reader/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (148 Bytes). View file
 
gpt_reader/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (148 Bytes). View file
 
gpt_reader/__pycache__/model_interface.cpython-38.pyc ADDED
Binary file (1.36 kB). View file
 
gpt_reader/__pycache__/model_interface.cpython-39.pyc ADDED
Binary file (1.36 kB). View file
 
gpt_reader/__pycache__/paper.cpython-38.pyc ADDED
Binary file (961 Bytes). View file
 
gpt_reader/__pycache__/paper.cpython-39.pyc ADDED
Binary file (961 Bytes). View file
 
gpt_reader/__pycache__/pdf_reader.cpython-38.pyc ADDED
Binary file (3.37 kB). View file
 
gpt_reader/__pycache__/pdf_reader.cpython-39.pyc ADDED
Binary file (3.37 kB). View file
 
gpt_reader/__pycache__/prompt.cpython-38.pyc ADDED
Binary file (1.28 kB). View file
 
gpt_reader/__pycache__/prompt.cpython-39.pyc ADDED
Binary file (1.28 kB). View file
 
gpt_reader/model_interface.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import openai
3
+
4
+
5
+ class ModelInterface(object):
6
+
7
+ def __init__(self) -> None:
8
+ pass
9
+
10
+ def send_msg(self, *args):
11
+ pass
12
+
13
+
14
+ class OpenAIModel(object):
15
+
16
+ def __init__(self, api_key, model='gpt-3.5-turbo', temperature=0.2) -> None:
17
+ openai.api_key = api_key
18
+ self.model = model
19
+ self.temperature = temperature
20
+
21
+ def send_msg(self, msg: List[dict], return_raw_text=True):
22
+
23
+ response = openai.ChatCompletion.create(
24
+ model=self.model,
25
+ messages=msg,
26
+ temperature=self.temperature
27
+ )
28
+
29
+ if return_raw_text:
30
+ return response["choices"][0]["message"]["content"]
31
+ else:
32
+ return response
gpt_reader/paper.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+
3
+ class Paper(object):
4
+
5
+ def __init__(self, pdf_obj: PdfReader) -> None:
6
+ self._pdf_obj = pdf_obj
7
+ self._paper_meta = self._pdf_obj.metadata
8
+
9
+ def iter_pages(self, iter_text_len: int = 3000):
10
+ page_idx = 0
11
+ for page in self._pdf_obj.pages:
12
+ txt = page.extract_text()
13
+ for i in range((len(txt) // iter_text_len) + 1):
14
+ yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len]
15
+ page_idx += 1
16
+
17
+
18
+ if __name__ == '__main__':
19
+ reader = PdfReader('../alexnet.pdf')
20
+ paper = Paper(reader)
gpt_reader/pdf_reader.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ import openai
3
+ from .prompt import BASE_POINTS, READING_PROMT_V2
4
+ from .paper import Paper
5
+ from .model_interface import OpenAIModel
6
+
7
+
8
+ # Setting the API key to use the OpenAI API
9
+ class PaperReader:
10
+
11
+ """
12
+ A class for summarizing research papers using the OpenAI API.
13
+
14
+ Attributes:
15
+ openai_key (str): The API key to use the OpenAI API.
16
+ token_length (int): The length of text to send to the API at a time.
17
+ model (str): The GPT model to use for summarization.
18
+ points_to_focus (str): The key points to focus on while summarizing.
19
+ verbose (bool): A flag to enable/disable verbose logging.
20
+
21
+ """
22
+
23
+ def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo",
24
+ points_to_focus=BASE_POINTS, verbose=False):
25
+
26
+ # Setting the API key to use the OpenAI API
27
+ openai.api_key = openai_key
28
+
29
+ # Initializing prompts for the conversation
30
+ self.init_prompt = READING_PROMT_V2.format(points_to_focus)
31
+
32
+ self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.'
33
+ self.messages = [] # Initializing the conversation messages
34
+ self.summary_msg = [] # Initializing the summary messages
35
+ self.token_len = token_length # Setting the token length to use
36
+ self.keep_round = 2 # Rounds of previous dialogues to keep in conversation
37
+ self.model = model # Setting the GPT model to use
38
+ self.verbose = verbose # Flag to enable/disable verbose logging
39
+ self.model = OpenAIModel(api_key=openai_key, model=model)
40
+
41
+ def drop_conversation(self, msg):
42
+ # This method is used to drop previous messages from the conversation and keep only recent ones
43
+ if len(msg) >= (self.keep_round + 1) * 2 + 1:
44
+ new_msg = [msg[0]]
45
+ for i in range(3, len(msg)):
46
+ new_msg.append(msg[i])
47
+ return new_msg
48
+ else:
49
+ return msg
50
+
51
+ def send_msg(self, msg):
52
+ return self.model.send_msg(msg)
53
+
54
+ def _chat(self, message):
55
+ # This method is used to send a message and get a response from the OpenAI API
56
+
57
+ # Adding the user message to the conversation messages
58
+ self.messages.append({"role": "user", "content": message})
59
+ # Sending the messages to the API and getting the response
60
+ response = self.send_msg(self.messages)
61
+ # Adding the system response to the conversation messages
62
+ self.messages.append({"role": "system", "content": response})
63
+ # Dropping previous conversation messages to keep the conversation history short
64
+ self.messages = self.drop_conversation(self.messages)
65
+ # Returning the system response
66
+ return response
67
+
68
+ def summarize(self, paper: Paper):
69
+ # This method is used to summarize a given research paper
70
+
71
+ # Adding the initial prompt to the conversation messages
72
+ self.messages = [
73
+ {"role": "system", "content": self.init_prompt},
74
+ ]
75
+ # Adding the summary prompt to the summary messages
76
+ self.summary_msg = [{"role": "system", "content": self.summary_prompt}]
77
+
78
+ # Reading and summarizing each part of the research paper
79
+ for (page_idx, part_idx, text) in paper.iter_pages():
80
+ print('page: {}, part: {}'.format(page_idx, part_idx))
81
+ # Sending the text to the API and getting the response
82
+ summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text))
83
+ # Logging the summary if verbose logging is enabled
84
+ if self.verbose:
85
+ print(summary)
86
+ # Adding the summary of the part to the summary messages
87
+ self.summary_msg.append({"role": "user", "content": '{}'.format(summary)})
88
+
89
+ # Adding a prompt for the user to summarize the whole paper to the summary messages
90
+ self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'})
91
+ # Sending the summary messages to the API and getting the response
92
+ result = self.send_msg(self.summary_msg)
93
+ # Returning the summary of the whole paper
94
+ return result
95
+
96
+ def read_pdf_and_summarize(self, pdf_path):
97
+ # This method is used to read a research paper from a PDF file and summarize it
98
+
99
+ # Creating a PdfReader object to read the PDF file
100
+ pdf_reader = PdfReader(pdf_path)
101
+ paper = Paper(pdf_reader)
102
+ # Summarizing the full text of the research paper and returning the summary
103
+ print('reading pdf finished')
104
+ summary = self.summarize(paper)
105
+ return summary
106
+
107
+ def get_summary_of_each_part(self):
108
+ # This method is used to get the summary of each part of the research paper
109
+ return self.summary_msg
110
+
111
+ def question(self, question):
112
+ # This method is used to ask a question after summarizing a paper
113
+
114
+ # Adding the question to the summary messages
115
+ self.summary_msg.append({"role": "user", "content": question})
116
+ # Sending the summary messages to the API and getting the response
117
+ response = self.send_msg(self.summary_msg)
118
+ # Adding the system response to the summary messages
119
+ self.summary_msg.append({"role": "system", "content": response})
120
+ # Returning the system response
121
+ return response
gpt_reader/prompt.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BASE_POINTS = """
2
+ 1. Who are the authors?
3
+ 2. What is the process of the proposed method?
4
+ 3. What is the performance of the proposed method? Please note down its performance metrics.
5
+ 4. What are the baseline models and their performances? Please note down these baseline methods.
6
+ 5. What dataset did this paper use?
7
+ """
8
+
9
+ READING_PROMPT = """
10
+ You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
11
+ Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
12
+ When you are reading, You need to focus on these key points:{}
13
+ """
14
+
15
+ READING_PROMT_V2 = """
16
+ You are a researcher helper bot. You can help the user with research paper reading and summarizing. \n
17
+ Now I am going to send you a paper. You need to read it and summarize it for me part by part. \n
18
+ When you are reading, You need to focus on these key points:{},
19
+
20
+ And You need to generate a brief but informative title for this part.
21
+ Your return format:
22
+ - title: '...'
23
+ - summary: '...'
24
+ """
25
+
26
+ SUMMARY_PROMPT = "You are a researcher helper bot. Now you need to read the summaries of a research paper."
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+
2
+ gradio==3.15.0
3
+ openai==0.27.1
4
+ PyPDF2==3.0.1
5
+