Spaces:
Runtime error
Runtime error
import gradio as gr | |
from arxiv2text import arxiv_to_text | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
def get_model(model_url="thepowerfuldeez/Qwen2-1.5B-Summarize", use_cpu=False): | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct") | |
if use_cpu: | |
model = AutoModelForCausalLM.from_pretrained( | |
model_url, | |
device_map="cpu", | |
# load_in_4bit=True, | |
attn_implementation="flash_attention_2", | |
) | |
else: | |
model = AutoModelForCausalLM.from_pretrained( | |
model_url, | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
load_in_4bit=True, | |
attn_implementation="flash_attention_2", | |
) | |
return model, tokenizer | |
def call_llm(model, tokenizer, text): | |
messages = [ | |
{"role": "system", "content": "You are helpful AI assistant."}, | |
{"role": "user", "content": text}, | |
] | |
input_ids = tokenizer.apply_chat_template( | |
messages, add_generation_prompt=True, return_tensors="pt" | |
) | |
new_tokens = model.generate(input_ids, max_new_tokens=512)[0][len(input_ids[0]) :] | |
output = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
return output | |
model, tokenizer = get_model(use_cpu=True) | |
def summarize_pdf(pdf_url): | |
extracted_text = arxiv_to_text(pdf_url) | |
summary = call_llm(model, tokenizer, f"Summarize following text: {extracted_text[:71000]}") | |
return summary | |
interface = gr.Interface( | |
fn=summarize_pdf, | |
inputs="text", | |
outputs="text", | |
title="Arxiv PDF Summarizer", | |
description="Enter the URL of an Arxiv PDF to get a summary." | |
) | |
interface.launch() |