import gradio as gr from PIL import Image from io import BytesIO import openai import os from dotenv import load_dotenv from image_processor import ImageProcessor from evaluation_processor import EvaluationProcessor load_dotenv() client = openai.OpenAI( api_key=os.getenv("OPENAI_API_KEY") ) # 设置OpenAI API密钥 openai.api_key = os.getenv("OPENAI_API_KEY") engine = "gpt-4o-mini" # 设置Music API密钥 api_key = 'ddc85b14-bd83-4757-9bc4-8a11194da536' image_processor = ImageProcessor(api_key) evaluation_processor = EvaluationProcessor(api_key) # 定义处理函数 def process_input(text=None, images=None, audio=None, video=None): # 创建GPT请求的描述 system = "1.你是一个音乐专家,只能回答音乐知识,回复的内容为普通文本格式,不用任何markdown符号如加粗等。如果提供的乐谱是abc记谱法,则回复时不要用abc记谱法,需要转换为传统的普通记谱法使用专业词汇进行回答问题\n2.你将根据下面指令回答问题,但是不能违反第一条指令,也不能在回复中提及。" messages = [{"role": "system", "content": system}] prompt = "" if text: prompt += f"\nText input: {text}" result_path = None if images: # 使用ImageProcessor处理图像 image_bytes_list = [] for image in images: img = Image.open(image.name) image_bytes = BytesIO() img.save(image_bytes, format="PNG") image_bytes.seek(0) image_bytes_list.append(image_bytes.getvalue()) try: processed_image_result = image_processor.process_images(image_bytes_list) #prompt += f"\n乐谱的内容如下,这是一首杜维诺伊的曲子,请你根据曲子的曲风回答问题: {processed_image_result}" prompt += f"\n乐谱的内容如下,请你根据曲子的曲风回答问题: {processed_image_result}" except Exception as e: return f"Error processing image: {e}", None if audio: try: # 使用EvaluationProcessor处理音频 audio_path = audio.name result,title = evaluation_processor.process_evaluation(audio_path, is_video=False) prompt += f'''如果有曲名{title},请你根据这首歌的名字作者,并且''' prompt += f'''1. 请你从 "eva_all":综合得分 "eva_completion":完整性 "eva_note":按键 "eva_stability":稳定性 "eva_tempo_sync":节奏 几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,使用中文,曲子为 {result}''' result_path = result.get('result_path', '') except Exception as e: return f"Error processing audio: {e}", None if video: try: # 使用EvaluationProcessor处理视频 video_path = video.name result,title = evaluation_processor.process_evaluation(video_path, is_video=True) prompt += f'''如果有曲名{title},请你根据这首歌的名字作者,并且''' prompt += f'''1.请你从 "eva_all":综合得分 "eva_completion":完整性 "eva_note":按键 "eva_stability":稳定性 "eva_tempo_sync":节奏 几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,使用中文,曲子为 {result}''' result_path = result.get('result_path', '') except Exception as e: return f"Error processing video: {e}", None # 使用GPT API进行处理 try: messages.append({"role": "user", "content": prompt}) response = client.chat.completions.create( model=engine, messages=messages, temperature=0.2, max_tokens=4096, top_p=0.95, frequency_penalty=0, presence_penalty=0, stop=None ) results = response.choices[0].message.content except Exception as e: return f"Error: {e}", None html_output = f"""""" if result_path else "" return results, html_output # 创建Gradio接口 iface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型,您可以上传需要分析的曲谱,音频和视频", lines=2), # 文本输入 gr.File(label="Input Images", file_count="multiple", type="filepath"), # 多文件上传 gr.File(label="Input Audio, mp3", type="filepath"), # 音频文件上传 gr.File(label="Input Video, mp4", type="filepath") # 视频文件上传 ], outputs=[ gr.Textbox(label="Output Text"), gr.HTML(label="Webpage") ], live=False, ) # 启动Gradio应用 iface.launch()