Spaces:
Runtime error
Runtime error
import gradio as gr | |
from ctransformers import AutoModelForCausalLM | |
# choose your champion | |
#model_id = "TheBloke/Llama-2-7B-GGML" | |
model_id = "TheBloke/Llama-2-7B-chat-GGML" | |
#model_id = "TheBloke/Llama-2-13B-GGML" | |
#model_id = "TheBloke/Llama-2-13B-chat-GGML" | |
# instantiate other inputs | |
gpu_layers = 130 if '13B' in model_id else 110 | |
config = {'max_new_tokens': 256, 'repetition_penalty': 1.1, 'temperature': 0.1, 'stream': True} | |
# get llm instance | |
llm = AutoModelForCausalLM.from_pretrained(model_id, | |
model_type="llama", | |
#lib='avx2', #for cpu use | |
gpu_layers=gpu_layers, #110 for 7b, 130 for 13b | |
**config | |
) | |
def predict(prompt): | |
# write prompt & tokenize | |
#system_prompt = """ | |
#""" | |
# send through model | |
res = llm(prompt, stream=False) | |
return res | |
demo = gr.Interface( | |
fn=predict, | |
inputs='text', | |
outputs='text', | |
) | |
demo.launch() |