Spaces:
Running
Running
import os | |
import re | |
import gradio as gr | |
from collections import deque | |
#import local package | |
import music_search | |
from process import process_images, process_audio_video | |
from html_image import setup_chrome, html_to_image, render_abc | |
from response import get_zhipuai_response | |
setup_chrome() | |
# Initialize memory with a deque (double-ended queue) to store up to 5 rounds | |
memory = deque(maxlen=10) | |
class State(): | |
def __init__(self): | |
self.state = self.init() | |
def init(self): | |
self.prev_image_result = None | |
self.prev_image_files = None | |
self.prev_media_result = None | |
self.prev_media_file = None | |
self.prev_media_viewer = None | |
def image_state_update(self, result, files): | |
self.prev_image_result = result | |
self.prev_image_files = files | |
def media_state_update(self, result, file, viewer): | |
self.prev_media_result = result | |
self.prev_media_file = file | |
self.prev_media_viewer = viewer | |
state = State() | |
def process_input(text=None, images=None, media=None): | |
print("Starting process_input") | |
system = "1.你是一个音乐专家,只能回答音乐知识,和打招呼,回复的内容为普通文本格式,不用任何markdown符号如加粗等。如果提供的乐谱是abc记谱法,则回复时不要用abc记谱法,需要使用专业音乐词汇和自然语言进行回答问题\n2.你将根据下面指令回答问题,但是不能违反第一条指令,也不能在回复中提及。" | |
messages = [{"role": "system", "content": system}] | |
#变量初始化 | |
prompt = "" | |
abc = False | |
abcfile = None | |
# 处理文本输入 | |
if text: | |
print("Processing text input") | |
prompt += f"用户指令: {text}." | |
abc = music_search.is_search(prompt) | |
if abc: | |
memory.clear() | |
state.init() | |
prompt += f"找到了用户搜的曲子,根据指令简略解读一下:{abc}" | |
# 处理图片输入 | |
if images: | |
if state.prev_image_files and set(images) == set(state.prev_image_files): | |
print("Using previous image result") | |
else: | |
print("Processing images") | |
memory.clear() | |
state.init() | |
prompt += process_images(images) | |
state.image_state_update(prompt, images) | |
# 处理音频/视频输入 | |
if media: | |
is_video = True if media[-3:] == "mp4" else False | |
#is_video = True | |
if state.prev_media_result and media.name == state.prev_media_file.name: | |
print("Using previous video result") | |
else: | |
print("Processing media") | |
memory.clear() | |
state.init() | |
result, result_viewer_path = process_audio_video(media, is_video = is_video) | |
prompt += result | |
state.media_state_update(result, media, result_viewer_path) | |
# 将历史对话从 memory 加入到 messages 中 | |
for past in memory: | |
messages.append({"role": "user", "content": "这是前几轮指令内容,根据需求读取这些内容:"+past["prompt"]}) | |
response = get_zhipuai_response(messages, prompt) | |
current_conversation = {"prompt": prompt, "response": response} # 更新当前对话的回复 | |
memory.append(current_conversation) # 保存当前对话到历史中 | |
media_output = f"""<iframe src="{state.prev_media_viewer}" width="100%" height="600"></iframe>""" if state.prev_media_viewer else "" | |
abc_image_output = render_abc(abc) if abc else "1" | |
#print(response) | |
#print(video_output) | |
#print(abc_image_output) | |
return response, media_output, abc_image_output | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_input, | |
inputs=[ | |
gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型,您可以上传需要分析的曲谱,音频和视频", lines=2), | |
gr.File(label="Input Images", file_count="multiple", type="filepath"), | |
gr.File(label="Input media, mp3 or mp4", type="filepath"), | |
], | |
outputs=[ | |
gr.Textbox(label="Output Text", interactive=True), # Enable streaming in the output | |
gr.HTML(label="Video Viewer"), | |
gr.Image(label="Image Viewer", type="filepath") | |
], | |
live=False, | |
) | |
# Launch Gradio application | |
iface.launch() | |