Spaces:
Runtime error
Runtime error
File size: 9,419 Bytes
b5a340a a64d886 b5a340a 265b836 e0cbd02 b5a340a 8fc0ebc b5a340a 4e9b859 630579b 4e9b859 630579b 4e9b859 630579b 4e9b859 630579b 4e9b859 630579b 4e9b859 b5a340a dc20729 b5a340a dc20729 b5a340a 7a6b087 a65fb79 b5a340a 7a6b087 8fc0ebc b5a340a 8fc0ebc b5a340a 7fef2b8 aaef1cf 2f9b13c aaef1cf b5a340a bb1395f 4a9fb3e b5a340a 7fef2b8 b469dbc 7fef2b8 dc20729 7fef2b8 619c177 497a3b3 b5a340a 34c5a9e 7fef2b8 8fc0ebc 4e9b859 a5a91e5 8fc0ebc e0cbd02 180a6c8 54d17c2 941d4b9 54d17c2 941d4b9 54d17c2 93c03f7 b5a340a dc20729 b5a340a a7c59f6 b5a340a bd2513f b5a340a dc20729 b5a340a 338efa3 4d9539a b5a340a 3a3994d b5a340a 8f02c1e b5a340a 2077720 334c97a b5a340a 334c97a b5a340a 334c97a b5a340a cad1aac c28b703 76f32c5 1c13b2d 67ff96d 9778459 b5a340a 3a3994d b5a340a dc20729 6a58114 2ccfa82 b5a340a 33e0c02 b5a340a 31b5387 702662c b5a340a 2ccfa82 b5a340a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import numpy as np
import os
import re
import jieba
from io import BytesIO
import datetime
import time
import openai, tenacity
import argparse
import configparser
import json
import tiktoken
import PyPDF2
import gradio
def contains_chinese(text):
for ch in text:
if u'\u4e00' <= ch <= u'\u9fff':
return True
return False
def insert_sentence(text, sentence, interval):
lines = text.split('\n')
new_lines = []
for line in lines:
if contains_chinese(line):
words = list(jieba.cut(line))
separator = ''
else:
words = line.split()
separator = ' '
new_words = []
count = 0
for word in words:
new_words.append(word)
count += 1
if count % interval == 0:
new_words.append(sentence)
new_lines.append(separator.join(new_words))
return '\n'.join(new_lines)
# 定义Reviewer类
class Reviewer:
# 初始化方法,设置属性
def __init__(self, api, api_base, review_format, paper_pdf, language):
self.api = api
self.review_format = review_format
self.api_base = api_base
self.language = language
self.paper_pdf = paper_pdf
self.max_token_num = 12000
self.encoding = tiktoken.get_encoding("gpt2")
def review_by_chatgpt(self, paper_list):
text = self.extract_chapter(self.paper_pdf)
chat_review_text, total_token_used = self.chat_review(text=text)
return chat_review_text, total_token_used
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
stop=tenacity.stop_after_attempt(5),
reraise=True)
def chat_review(self, text):
openai.api_key = self.api # 读取api
review_prompt_token = 1000
try:
text_token = len(self.encoding.encode(text))
except:
text_token = 13000
input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1))
input_text = "This is the paper for your review:" + text[:input_text_index]
messages=[
{"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} ,
{"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)},
]
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=messages,
temperature=0.5
)
result = ''
for choice in response.choices:
result += choice.message.content
result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50)
result += "\n\n⚠伦理声明/Ethics statement:\n--禁止直接复制生成的评论用于任何论文审稿工作!\n--Direct copying of generated comments for any paper review work is prohibited!"
usage = response.usage.total_tokens
except Exception as e:
# 处理其他的异常
result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e)
usage = 'xxxxx'
print("********"*10)
print(result)
print("********"*10)
return result, usage
def extract_chapter(self, pdf_path):
file_object = BytesIO(pdf_path)
pdf_reader = PyPDF2.PdfReader(file_object)
# 获取PDF的总页数
num_pages = len(pdf_reader.pages)
# 初始化提取状态和提取文本
extraction_started = False
extracted_text = ""
# 遍历PDF中的每一页
for page_number in range(num_pages):
page = pdf_reader.pages[page_number]
page_text = page.extract_text()
# 开始提取
extraction_started = True
page_number_start = page_number
# 如果提取已开始,将页面文本添加到提取文本中
if extraction_started:
extracted_text += page_text
# 停止提取
if page_number_start + 1 < page_number:
break
return extracted_text
def main(api,api_base, review_format, paper_pdf, language):
start_time = time.time()
comments = ''
output2 = ''
if not api or not review_format or not paper_pdf:
comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!"
output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!"
# 判断PDF文件
else:
# 创建一个Reader对象
reviewer1 = Reviewer(api,api_base, review_format, paper_pdf, language)
# 开始判断是路径还是文件:
comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf)
time_used = time.time() - start_time
output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒"
return comments, output2
########################################################################################################
# 标题
title = "🤖ChatReviewer🤖"
# 描述
description = '''<div align='left'>
<img align='right' src='http://i.imgtg.com/2023/03/22/94PLN.png' width="220">
<strong>ChatReviewer是一款基于ChatGPT-3.5的API开发的智能论文分析与建议助手。</strong>其用途如下:
⭐️对论文的优缺点进行快速总结和分析,提高科研人员的文献阅读和理解的效率,紧跟研究前沿。
⭐️对自己的论文进行分析,根据ChatReviewer生成的改进建议进行查漏补缺,进一步提高自己的论文质量。
如果觉得很卡,可以点击右上角的Duplicate this Space,把ChatReviewer复制到你自己的Space中!(🈲:禁止直接复制生成的评论用于任何论文审稿工作!)
本项目的[Github](https://github.com/nishiwen1214/ChatReviewer),欢迎Star和Fork,也欢迎大佬赞助让本项目快速成长!💗
</div>
'''
# 创建Gradio界面
inp = [gradio.inputs.Textbox(label="请输入你的API-key(sk开头的字符串)",
default="",
type='password'),
gradio.inputs.Textbox(label="请输入第三方中转网址",
default="https://api.openai.com/v1"),
gradio.inputs.Textbox(lines=5,
label="请输入特定的分析要求和格式(否则为默认格式)",
default="""* Overall Review
Please briefly summarize the main points and contributions of this paper.
xxx
* Paper Strength
Please provide a list of the strengths of this paper, including but not limited to: innovative and practical methodology, insightful empirical findings or in-depth theoretical analysis,
well-structured review of relevant literature, and any other factors that may make the paper valuable to readers. (Maximum length: 2,000 characters)
(1) xxx
(2) xxx
(3) xxx
* Paper Weakness
Please provide a numbered list of your main concerns regarding this paper (so authors could respond to the concerns individually).
These may include, but are not limited to: inadequate implementation details for reproducing the study, limited evaluation and ablation studies for the proposed method,
correctness of the theoretical analysis or experimental results, lack of comparisons or discussions with widely-known baselines in the field, lack of clarity in exposition,
or any other factors that may impede the reader's understanding or benefit from the paper. Please kindly refrain from providing a general assessment of the paper's novelty without providing detailed explanations. (Maximum length: 2,000 characters)
(1) xxx
(2) xxx
(3) xxx
* Questions To Authors And Suggestions For Rebuttal
Please provide a numbered list of specific and clear questions that pertain to the details of the proposed method, evaluation setting, or additional results that would aid in supporting the authors' claims.
The questions should be formulated in a manner that, after the authors have answered them during the rebuttal, it would enable a more thorough assessment of the paper's quality. (Maximum length: 2,000 characters)
*Overall score (1-10)
The paper is scored on a scale of 1-10, with 10 being the full mark, and 6 stands for borderline accept. Then give the reason for your rating.
xxx"""
),
gradio.inputs.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="bytes"),
gradio.inputs.Radio(choices=["English", "Chinese", "French", "German","Japenese"],
default="English",
label="选择输出语言"),
]
chat_reviewer_gui = gradio.Interface(fn=main,
inputs=inp,
outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")],
title=title,
description=description)
# Start server
chat_reviewer_gui .launch(quiet=True, show_api=False) |