# Load model directly import streamlit as st from transformers import AutoModel model,tokenizer = AutoModel.from_pretrained("shivam9980/mistral-7b-news") alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" content = st.text_input('Content') inputs = tokenizer( [ alpaca_prompt.format( "The following passage is content from a news report. Please summarize this passage in one sentence or less.", # instruction content, # input "", # output - leave this blank for generation! ) ], return_tensors = "pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) results= tokenizer.batch_decode(outputs) st.write(results)