Spaces:

merve
/

chameleon-7b

Running

App Files Files Community

sadzxctv commited on Jul 31

Commit

94e996e

•

1 Parent(s): 80fa0a9

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -90

app.py CHANGED Viewed

@@ -1,94 +1,112 @@
-from transformers import ChameleonProcessor, ChameleonForConditionalGeneration, TextIteratorStreamer, BitsAndBytesConfig
-import torch
-from PIL import Image
-import requests
 import spaces
-from threading import Thread
 import gradio as gr
-from gradio import FileData
-import time
-processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
-model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16).to("cuda")
-@spaces.GPU
-def bot_streaming(message, history):
-  txt = message.text
-  ext_buffer = f"{txt}"
-  if message.files:
-    if len(message.files) == 1:
-      image = [message.files[0].path]
-    # interleaved images or video
-    elif len(message.files) > 1:
-      image = [msg.path for msg in message.files]
-  else:
-    def has_file_data(lst):
-      return any(isinstance(item, FileData) for sublist in lst if isinstance(sublist, tuple) for item in sublist)
-    def extract_paths(lst):
-        return [item.path for sublist in lst if isinstance(sublist, tuple) for item in sublist if isinstance(item, FileData)]
-    latest_text_only_index = -1
-    for i, item in enumerate(history):
-        if all(isinstance(sub_item, str) for sub_item in item):
-            latest_text_only_index = i
-    image = [path for i, item in enumerate(history) if i < latest_text_only_index and has_file_data(item) for path in extract_paths(item)]
-  if message.files is None:
-      gr.Error("You need to upload an image or video for LLaVA to work.")
-  image_extensions = Image.registered_extensions()
-  image_extensions = tuple([ex for ex, f in image_extensions.items()])
-  if len(image) == 1:
-      image = Image.open(image[0]).convert("RGB")
-      prompt = f"{message.text}<image>"
-  elif len(image) > 1:
-    image_list = []
-    user_prompt = message.text
-    for img in image:
-      img = Image.open(img).convert("RGB")
-      image_list.append(img)
-    toks = "<image>" * len(image_list)
-    prompt = user_prompt + toks
-    image = image_list
-  inputs = processor(prompt, image, return_tensors="pt").to("cuda", torch.float16)
-  streamer = TextIteratorStreamer(processor, {"skip_special_tokens": True})
-  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=250)
-  generated_text = ""
-  thread = Thread(target=model.generate, kwargs=generation_kwargs)
-  thread.start()
-  buffer = ""
-  for new_text in streamer:
-    buffer += new_text
-    generated_text_without_prompt = buffer#[len(ext_buffer):]
-    time.sleep(0.01)
-    yield buffer
-demo = gr.ChatInterface(fn=bot_streaming, title="Chameleon 🦎", examples=[
-    {"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./wat_arun.jpg"]},
-    {"text": "Do these two pieces belong to the same era and if so, which era is it?", "files":["./rococo_1.jpg","./rococo_2.jpg"]},
-    {"text": "What art style is this and which century?", "files":["./rococo_1.jpg"]},
-    {"text": "What is on the flower?", "files":["./bee.jpg"]}],
-      textbox=gr.MultimodalTextbox(file_count="multiple"),
-      description="Try [Chameleon-7B](https://huggingface.co/facebook/chameleon-7b) by Meta with transformers in this demo. Upload image(s), and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error. ",
-      stop_btn="Stop Generation", multimodal=True)
-demo.launch(debug=True)

 import spaces
+import json
+import subprocess
+from llama_cpp import Llama
+from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
+from llama_cpp_agent.providers import LlamaCppPythonProvider
+from llama_cpp_agent.chat_history import BasicChatHistory
+from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
+from huggingface_hub import hf_hub_download
+# 下載Sakura-32B模型
+hf_hub_download(
+    repo_id="SakuraLLM/Sakura-32B-Qwen2beta-v0.10pre1-GGUF",
+    filename="sakura-32b-qwen2beta-v0.10pre1-q4km.gguf",
+    local_dir="./models"
+)
+llm = None
+llm_model = None
+@spaces.GPU(duration=120)
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    model='sakura-32b-qwen2beta-v0.10pre1-q4km.gguf'
+):
+    chat_template = MessagesFormatterType.GEMMA_2
+    system_message = "你是一个轻小说翻译模型，可以流畅通顺地使用给定的术语表以日本轻小说的风格将日文翻译成简体中文，并联系上下文正确使用人称代词，注意不要混淆使役态和被动态的主语和宾语，不要擅自添加原文中没有的代词，也不要擅自增加或减少换行。"
+    global llm
+    global llm_model
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=8192,
+        )
+        llm_model = model
+    provider = LlamaCppPythonProvider(llm)
+    agent = LlamaCppAgent(
+        provider,
+        system_prompt=f"{system_message}",
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True
+    )
+    settings = provider.get_provider_default_settings()
+    settings.temperature = 0.1
+    settings.top_p = 0.3
+    settings.do_sample = True
+    settings.num_beams = 1
+    settings.repetition_penalty = 1
+    settings.max_new_tokens = 512
+    settings.min_new_tokens = 1
+    settings.stream = True
+    messages = BasicChatHistory()
+    for msn in history:
+        user = {
+            'role': Roles.user,
+            'content': msn[0]
+        }
+        assistant = {
+            'role': Roles.assistant,
+            'content': msn[1]
+        }
+        messages.add_message(user)
+        messages.add_message(assistant)
+    stream = agent.get_chat_response(
+        message,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False
+    )
+    outputs = ""
+    for output in stream:
+        outputs += output
+        yield outputs
+description = """<p align="center">Defaults to Sakura-32B-Qwen2beta</p>
+<p><center>
+<a href="https://huggingface.co/SakuraLLM/Sakura-32B-Qwen2beta-v0.10pre1-GGUF" target="_blank">[Sakura-32B-Qwen2beta Model]</a>
+</center></p>
+"""
+demo = gr.ChatInterface(
+    respond,
+    retry_btn="Retry",
+    undo_btn="Undo",
+    clear_btn="Clear",
+    submit_btn="Send",
+    title="Chat with Sakura 32B using llama.cpp",
+    description=description,
+    chatbot=gr.Chatbot(
+        scale=1,
+        likeable=False,
+        show_copy_button=True
+    )
+)
+if __name__ == "__main__":
+    demo.launch()