gradio_sandbox / app.py
geoffhorowitz's picture
Create app.py
1b4620e
import gradio as gr
from ctransformers import AutoModelForCausalLM
# choose your champion
#model_id = "TheBloke/Llama-2-7B-GGML"
model_id = "TheBloke/Llama-2-7B-chat-GGML"
#model_id = "TheBloke/Llama-2-13B-GGML"
#model_id = "TheBloke/Llama-2-13B-chat-GGML"
# instantiate other inputs
gpu_layers = 130 if '13B' in model_id else 110
config = {'max_new_tokens': 256, 'repetition_penalty': 1.1, 'temperature': 0.1, 'stream': True}
# get llm instance
llm = AutoModelForCausalLM.from_pretrained(model_id,
model_type="llama",
#lib='avx2', #for cpu use
gpu_layers=gpu_layers, #110 for 7b, 130 for 13b
**config
)
def predict(prompt):
# write prompt & tokenize
#system_prompt = """
#"""
# send through model
res = llm(prompt, stream=False)
return res
demo = gr.Interface(
fn=predict,
inputs='text',
outputs='text',
)
demo.launch()