Spaces:

fistyyy
/

Music_LMMs

Running

File size: 4,963 Bytes

import gradio as gr
from PIL import Image
from io import BytesIO
import openai
import os
from dotenv import load_dotenv
from image_processor import ImageProcessor
from evaluation_processor import EvaluationProcessor

load_dotenv()

client = openai.OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
)

# 设置OpenAI API密钥
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "gpt-4o-mini"

# 设置Music API密钥
api_key = 'ddc85b14-bd83-4757-9bc4-8a11194da536'
image_processor = ImageProcessor(api_key)
evaluation_processor = EvaluationProcessor(api_key)

# 定义处理函数
def process_input(text=None, images=None, audio=None, video=None):
    # 创建GPT请求的描述
    system = "1.你是一个音乐专家，只能回答音乐知识，回复的内容为普通文本格式,不用任何markdown符号如加粗等。如果提供的乐谱是abc记谱法,则回复时不要用abc记谱法,需要转换为传统的普通记谱法使用专业词汇进行回答问题\n2.你将根据下面指令回答问题，但是不能违反第一条指令,也不能在回复中提及。"
    messages = [{"role": "system", "content": system}]
    prompt = ""
    if text:
        prompt += f"\nText input: {text}"
    
    if images:
        # 使用ImageProcessor处理图像
        image_bytes_list = []
        for image in images:
            img = Image.open(image.name)
            image_bytes = BytesIO()
            img.save(image_bytes, format="PNG")
            image_bytes.seek(0)
            image_bytes_list.append(image_bytes.getvalue())
        
        try:
            processed_image_result = image_processor.process_images(image_bytes_list)
            #prompt += f"\n乐谱的内容如下,这是一首杜维诺伊的曲子，请你根据曲子的曲风回答问题: {processed_image_result}"
            prompt += f"\n乐谱的内容如下,请你根据曲子的曲风回答问题: {processed_image_result}"
        
        except Exception as e:
            return f"Error processing image: {e}", None
    
    if audio:
        try:
            # 使用EvaluationProcessor处理音频
            audio_path = audio.name
            result,title = evaluation_processor.process_evaluation(audio_path, is_video=False)
            prompt += f'''如果有曲名{title},请你根据这首歌的名字作者，并且'''
            prompt += f'''1. 请你从
            "eva_all":综合得分
            "eva_completion":完整性
            "eva_note":按键
            "eva_stability":稳定性
            "eva_tempo_sync":节奏
            几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文，使用中文，曲子为 {result}'''
            result_path = result.get('result_path', '')

        
        except Exception as e:
            return f"Error processing audio: {e}", None
    
    if video:
        try:
            # 使用EvaluationProcessor处理视频
            video_path = video.name
            result,title = evaluation_processor.process_evaluation(video_path, is_video=True)
            prompt += f'''如果有曲名{title},请你根据这首歌的名字作者，并且'''
            prompt += f'''1.请你从
            "eva_all":综合得分
            "eva_completion":完整性
            "eva_note":按键
            "eva_stability":稳定性
            "eva_tempo_sync":节奏
            几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文，使用中文，曲子为 {result}'''
            result_path = result.get('result_path', '')

        except Exception as e:
            return f"Error processing video: {e}", None
    
    # 使用GPT API进行处理
    try:
        messages.append({"role": "user", "content": prompt})
        response = client.chat.completions.create(
            model=engine,
            messages=messages,
            temperature=0.2,
            max_tokens=4096,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None
        )
        results = response.choices[0].message.content
        
    except Exception as e:
        return f"Error: {e}", None

    html_output = f"""<iframe src="{result_path}" width="100%" height="600"></iframe>""" if result_path else ""
    return results, html_output

# 创建Gradio接口
iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型，您可以上传需要分析的曲谱，音频和视频", lines=2),  # 文本输入
        gr.File(label="Input Images", file_count="multiple", type="filepath"),  # 多文件上传
        gr.File(label="Input Audio", type="filepath"),  # 音频文件上传
        gr.File(label="Input Video", type="filepath")  # 视频文件上传
    ],
    outputs=[
        gr.Textbox(label="Output Text"),
        gr.HTML(label="Webpage")
    ],
    live=False,
)

# 启动Gradio应用
iface.launch()