Spaces:

camanalo1
/

Test-Phi3

Runtime error

App Files Files Community

camanalo1 commited on May 1, 2024

Commit

67a004a

•

1 Parent(s): 797a570

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -65

app.py CHANGED Viewed

@@ -1,70 +1,17 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-import os
-from threading import Thread
-import spaces
-token = os.environ["HF_TOKEN"]
-# Load the LLM model and tokenizer
-model = AutoModelForCausalLM.from_pretrained(
-    "microsoft/Phi-3-mini-4k-instruct", token=token, trust_remote_code=True
-)
-tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", token=token)
-terminators = [tok.eos_token_id]
-# Set device
-if torch.cuda.is_available():
-    device = torch.device("cuda")
-    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
-else:
-    device = torch.device("cpu")
-    print("Using CPU")
-model = model.to(device)
-@spaces.GPU(duration=60)
-def chat(message):
-    # Initialize chat history with the user's message
-    chat = [{"role": "user", "content": message}]
-    # Convert chat history to a format suitable for the model
-    messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    # Tokenize the messages and move them to the appropriate device
-    model_inputs = tok([messages], return_tensors="pt").to(device)
-    # Initialize a TextIteratorStreamer for dynamic generation
-    streamer = TextIteratorStreamer(
-        tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
-    )
-    # Set generation parameters
-    generate_kwargs = {
-        **model_inputs,
-        "streamer": streamer,
-        "max_new_tokens": 256,  # You can adjust this value if needed
-        "do_sample": True,
-        "temperature": 0.9,
-        "eos_token_id": terminators,
-    }
-    # Start model generation in a separate thread
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    partial_text = ""
-    # Yield partially generated text until generation is complete
-    for new_text in streamer:
-        partial_text += new_text
-        yield partial_text
-    # Yield the final generated text
-    yield partial_text
-demo = gr.Interface(
-    fn=chat,
-    inputs="text",
-    outputs=gr.Textbox(lines=5, label="Generated Text"),
-    title="Chat With LLMs",
-    description="Now Running [microsoft/Phi-3-mini-4k-instruct](https://huggingface.com/microsoft/Phi-3-mini-4k-instruct)",
-)
-demo.launch()

 import gradio as gr
+from transformers import pipeline
+# Initialize the text generation pipeline
+generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
+# Define the function to generate text based on input prompt
+def generate_text(prompt):
+    # Generate text based on the input prompt
+    generated_text = generator(prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
+    return generated_text
+# Create Gradio interface
+prompt_input = gr.Textbox(lines=5, label="Input Prompt")
+output_text = gr.Textbox(label="Generated Text")
+gr.Interface(generate_text, prompt_input, output_text, title="Conversational AI", description="Engage in conversation with our AI.").launch()