Music_LMMs / app.py
fistyee
update
099ffc5
raw
history blame
4.42 kB
import os
import re
import gradio as gr
from collections import deque
#import local package
import music_search
from process import process_images, process_audio_video
from html_image import setup_chrome, html_to_image, render_abc
from response import get_zhipuai_response
setup_chrome()
# Initialize memory with a deque (double-ended queue) to store up to 5 rounds
memory = deque(maxlen=10)
class State():
def __init__(self):
self.state = self.init()
def init(self):
self.prev_image_result = None
self.prev_image_files = None
self.prev_media_result = None
self.prev_media_file = None
self.prev_media_viewer = None
def image_state_update(self, result, files):
self.prev_image_result = result
self.prev_image_files = files
def media_state_update(self, result, file, viewer):
self.prev_media_result = result
self.prev_media_file = file
self.prev_media_viewer = viewer
state = State()
def process_input(text=None, images=None, media=None):
print("Starting process_input")
system = "1.你是一个音乐专家,只能回答音乐知识,和打招呼,回复的内容为普通文本格式,不用任何markdown符号如加粗等。如果提供的乐谱是abc记谱法,则回复时不要用abc记谱法,需要使用专业音乐词汇和自然语言进行回答问题\n2.你将根据下面指令回答问题,但是不能违反第一条指令,也不能在回复中提及。"
messages = [{"role": "system", "content": system}]
#变量初始化
prompt = ""
abc = False
abcfile = None
# 处理文本输入
if text:
print("Processing text input")
prompt += f"用户指令: {text}."
abc = music_search.is_search(prompt)
if abc:
memory.clear()
state.init()
prompt += f"找到了用户搜的曲子,根据指令简略解读一下:{abc}"
# 处理图片输入
if images:
if state.prev_image_files and set(images) == set(state.prev_image_files):
print("Using previous image result")
else:
print("Processing images")
memory.clear()
state.init()
prompt += process_images(images)
state.image_state_update(prompt, images)
# 处理音频/视频输入
if media:
is_video = True if media[-3:] == "mp4" else False
#is_video = True
if state.prev_media_result and media.name == state.prev_media_file.name:
print("Using previous video result")
else:
print("Processing media")
memory.clear()
state.init()
result, result_viewer_path = process_audio_video(media, is_video = is_video)
prompt += result
state.media_state_update(result, media, result_viewer_path)
# 将历史对话从 memory 加入到 messages 中
for past in memory:
messages.append({"role": "user", "content": "这是前几轮指令内容,根据需求读取这些内容:"+past["prompt"]})
response = get_zhipuai_response(messages, prompt)
current_conversation = {"prompt": prompt, "response": response} # 更新当前对话的回复
memory.append(current_conversation) # 保存当前对话到历史中
media_output = f"""<iframe src="{state.prev_media_viewer}" width="100%" height="600"></iframe>""" if state.prev_media_viewer else ""
abc_image_output = render_abc(abc) if abc else "1"
#print(response)
#print(video_output)
#print(abc_image_output)
return response, media_output, abc_image_output
# Create Gradio interface
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型,您可以上传需要分析的曲谱,音频和视频", lines=2),
gr.File(label="Input Images", file_count="multiple", type="filepath"),
gr.File(label="Input media, mp3 or mp4", type="filepath"),
],
outputs=[
gr.Textbox(label="Output Text", interactive=True), # Enable streaming in the output
gr.HTML(label="Video Viewer"),
#gr.Image(label="Image Viewer", type="filepath")
gr.HTML()
],
live=False,
)
# Launch Gradio application
iface.launch()