Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
client = InferenceClient(model="https://085e2ffe-b85f-4b3d-a764-3ee4f8b84f93.job.console.elementai.com", | |
headers={"Authorization":"Bearer lBM9eFdmX943rDzDTi2YHw:oCGJZvu9JAhgolNNC8Fbb0QBp4nsUGK4FbCyqUy6lT8"}) | |
def inference(message, history): | |
partial_message = "" | |
for token in client.text_generation(message, max_new_tokens=16393, stream=True): | |
partial_message += token | |
yield partial_message | |
gr.ChatInterface( | |
inference, | |
chatbot=gr.Chatbot(height=700), | |
textbox=gr.Textbox(placeholder="Prompt CodeLlama model", container=False, scale=2), | |
description="This is the CodeLLaMA 34b-Instruct-hf model. Note that this \"chat\" does not keep context. Each message is a separate prompt.", | |
title="ATG π€ TGI", | |
examples=["write a Python function to add 2 numbers", "write a Javascript function that logs out the current time"], | |
retry_btn="Retry", | |
undo_btn="Undo", | |
clear_btn="Clear", | |
).queue().launch(share=True) |