import numpy as np import os import re import datetime import time import openai, tenacity import argparse import configparser import json import tiktoken import PyPDF2 import gradio # 定义Reviewer类 class Reviewer: # 初始化方法,设置属性 def __init__(self, api, review_format, paper_pdf, language): self.api = api self.review_format = review_format self.language = language self.max_token_num = 4097 self.encoding = tiktoken.get_encoding("gpt2") def review_by_chatgpt(self, paper_list): text = extract_chapter(paper_list) chat_review_text, total_token_used = self.chat_review(text=text) return chat_review_text, total_token_used @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5), reraise=True) def chat_review(self, text): openai.api_key = self.api # 读取api review_prompt_token = 1000 text_token = len(self.encoding.encode(text)) input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/text_token) input_text = "This is the paper for your review:" + text[:input_text_index] messages=[ {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format +" Must be output in {}.".format(self.language)}, {"role": "user", "content": input_text}, ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, ) result = '' for choice in response.choices: result += choice.message.content print("********"*10) print(result) print("********"*10) print("prompt_token_used:", response.usage.prompt_tokens) print("completion_token_used:", response.usage.completion_tokens) print("total_token_used:", response.usage.total_tokens) print("response_time:", response.response_ms/1000.0, 's') return result, response.usage.total_tokens def extract_chapter(self, pdf_path): with open(pdf_path, 'rb') as file: # 创建一个PDF阅读器对象 pdf_reader = PyPDF2.PdfReader(file) # 获取PDF的总页数 num_pages = len(pdf_reader.pages) # 初始化提取状态和提取文本 extraction_started = False extracted_text = "" # 遍历PDF中的每一页 for page_number in range(num_pages): page = pdf_reader.pages[page_number] page_text = page.extract_text() # 如果找到了章节标题,开始提取 if 'Abstract'.lower() in page_text.lower() and not extraction_started: extraction_started = True page_number_start = page_number # 如果提取已开始,将页面文本添加到提取文本中 if extraction_started: extracted_text += page_text # 如果找到下一章节标题,停止提取 if page_number_start + 1 < page_number: break return extracted_text def main(api, review_format, paper_pdf, language): start_time = time.time() if not api or not review_format or not paper_pdf: return "请输入完整内容!" # 判断PDF文件 else: paper_list = paper_pdf # 创建一个Reader对象 reviewer1 = Reviewer(api, review_format, paper_pdf, language) # 开始判断是路径还是文件: comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_list) time_used = time.time() - start_time output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" return comments, output2 ######################################################################################################## # 标题 title = "🤖ChatReviewer🤖" # 描述 description = '''