sadzxctv commited on
Commit
94e996e
1 Parent(s): 80fa0a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -90
app.py CHANGED
@@ -1,94 +1,112 @@
1
- from transformers import ChameleonProcessor, ChameleonForConditionalGeneration, TextIteratorStreamer, BitsAndBytesConfig
2
- import torch
3
- from PIL import Image
4
- import requests
5
  import spaces
6
- from threading import Thread
 
 
 
 
 
 
7
  import gradio as gr
8
- from gradio import FileData
9
- import time
10
-
11
- processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
12
-
13
- model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.float16).to("cuda")
14
-
15
- @spaces.GPU
16
- def bot_streaming(message, history):
17
-
18
- txt = message.text
19
- ext_buffer = f"{txt}"
20
-
21
- if message.files:
22
- if len(message.files) == 1:
23
- image = [message.files[0].path]
24
- # interleaved images or video
25
- elif len(message.files) > 1:
26
- image = [msg.path for msg in message.files]
27
- else:
28
-
29
- def has_file_data(lst):
30
- return any(isinstance(item, FileData) for sublist in lst if isinstance(sublist, tuple) for item in sublist)
31
-
32
- def extract_paths(lst):
33
- return [item.path for sublist in lst if isinstance(sublist, tuple) for item in sublist if isinstance(item, FileData)]
34
-
35
- latest_text_only_index = -1
36
-
37
- for i, item in enumerate(history):
38
- if all(isinstance(sub_item, str) for sub_item in item):
39
- latest_text_only_index = i
40
-
41
- image = [path for i, item in enumerate(history) if i < latest_text_only_index and has_file_data(item) for path in extract_paths(item)]
42
-
43
- if message.files is None:
44
- gr.Error("You need to upload an image or video for LLaVA to work.")
45
-
46
- image_extensions = Image.registered_extensions()
47
- image_extensions = tuple([ex for ex, f in image_extensions.items()])
48
- if len(image) == 1:
49
- image = Image.open(image[0]).convert("RGB")
50
- prompt = f"{message.text}<image>"
51
-
52
- elif len(image) > 1:
53
- image_list = []
54
- user_prompt = message.text
55
-
56
- for img in image:
57
- img = Image.open(img).convert("RGB")
58
- image_list.append(img)
59
-
60
- toks = "<image>" * len(image_list)
61
- prompt = user_prompt + toks
62
-
63
- image = image_list
64
-
65
-
66
- inputs = processor(prompt, image, return_tensors="pt").to("cuda", torch.float16)
67
- streamer = TextIteratorStreamer(processor, {"skip_special_tokens": True})
68
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=250)
69
- generated_text = ""
70
-
71
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
72
- thread.start()
73
-
74
-
75
-
76
- buffer = ""
77
- for new_text in streamer:
78
 
79
- buffer += new_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- generated_text_without_prompt = buffer#[len(ext_buffer):]
82
- time.sleep(0.01)
83
- yield buffer
84
-
85
-
86
- demo = gr.ChatInterface(fn=bot_streaming, title="Chameleon 🦎", examples=[
87
- {"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./wat_arun.jpg"]},
88
- {"text": "Do these two pieces belong to the same era and if so, which era is it?", "files":["./rococo_1.jpg","./rococo_2.jpg"]},
89
- {"text": "What art style is this and which century?", "files":["./rococo_1.jpg"]},
90
- {"text": "What is on the flower?", "files":["./bee.jpg"]}],
91
- textbox=gr.MultimodalTextbox(file_count="multiple"),
92
- description="Try [Chameleon-7B](https://huggingface.co/facebook/chameleon-7b) by Meta with transformers in this demo. Upload image(s), and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error. ",
93
- stop_btn="Stop Generation", multimodal=True)
94
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spaces
2
+ import json
3
+ import subprocess
4
+ from llama_cpp import Llama
5
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
7
+ from llama_cpp_agent.chat_history import BasicChatHistory
8
+ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ # 下載Sakura-32B模型
13
+ hf_hub_download(
14
+ repo_id="SakuraLLM/Sakura-32B-Qwen2beta-v0.10pre1-GGUF",
15
+ filename="sakura-32b-qwen2beta-v0.10pre1-q4km.gguf",
16
+ local_dir="./models"
17
+ )
18
+
19
+ llm = None
20
+ llm_model = None
21
+
22
+ @spaces.GPU(duration=120)
23
+ def respond(
24
+ message,
25
+ history: list[tuple[str, str]],
26
+ model='sakura-32b-qwen2beta-v0.10pre1-q4km.gguf'
27
+ ):
28
+ chat_template = MessagesFormatterType.GEMMA_2
29
+ system_message = "你是一个轻小说翻译模型,可以流畅通顺地使用给定的术语表以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,注意不要混淆使役态和被动态的主语和宾语,不要擅自添加原文中没有的代词,也不要擅自增加或减少换行。"
30
+
31
+ global llm
32
+ global llm_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ if llm is None or llm_model != model:
35
+ llm = Llama(
36
+ model_path=f"models/{model}",
37
+ flash_attn=True,
38
+ n_gpu_layers=81,
39
+ n_batch=1024,
40
+ n_ctx=8192,
41
+ )
42
+ llm_model = model
43
+
44
+ provider = LlamaCppPythonProvider(llm)
45
+
46
+ agent = LlamaCppAgent(
47
+ provider,
48
+ system_prompt=f"{system_message}",
49
+ predefined_messages_formatter_type=chat_template,
50
+ debug_output=True
51
+ )
52
 
53
+ settings = provider.get_provider_default_settings()
54
+ settings.temperature = 0.1
55
+ settings.top_p = 0.3
56
+ settings.do_sample = True
57
+ settings.num_beams = 1
58
+ settings.repetition_penalty = 1
59
+ settings.max_new_tokens = 512
60
+ settings.min_new_tokens = 1
61
+ settings.stream = True
62
+
63
+ messages = BasicChatHistory()
64
+
65
+ for msn in history:
66
+ user = {
67
+ 'role': Roles.user,
68
+ 'content': msn[0]
69
+ }
70
+ assistant = {
71
+ 'role': Roles.assistant,
72
+ 'content': msn[1]
73
+ }
74
+ messages.add_message(user)
75
+ messages.add_message(assistant)
76
+
77
+ stream = agent.get_chat_response(
78
+ message,
79
+ llm_sampling_settings=settings,
80
+ chat_history=messages,
81
+ returns_streaming_generator=True,
82
+ print_output=False
83
+ )
84
+
85
+ outputs = ""
86
+ for output in stream:
87
+ outputs += output
88
+ yield outputs
89
+
90
+ description = """<p align="center">Defaults to Sakura-32B-Qwen2beta</p>
91
+ <p><center>
92
+ <a href="https://huggingface.co/SakuraLLM/Sakura-32B-Qwen2beta-v0.10pre1-GGUF" target="_blank">[Sakura-32B-Qwen2beta Model]</a>
93
+ </center></p>
94
+ """
95
+
96
+ demo = gr.ChatInterface(
97
+ respond,
98
+ retry_btn="Retry",
99
+ undo_btn="Undo",
100
+ clear_btn="Clear",
101
+ submit_btn="Send",
102
+ title="Chat with Sakura 32B using llama.cpp",
103
+ description=description,
104
+ chatbot=gr.Chatbot(
105
+ scale=1,
106
+ likeable=False,
107
+ show_copy_button=True
108
+ )
109
+ )
110
+
111
+ if __name__ == "__main__":
112
+ demo.launch()