File size: 6,859 Bytes
d0c2b7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import gradio as gr
from PIL import Image
from io import BytesIO
import openai
import os
from dotenv import load_dotenv
from image_processor import ImageProcessor
from evaluation_processor import EvaluationProcessor
from zhipuai import ZhipuAI
# Load environment variables
load_dotenv()

# Initialize OpenAI client
openai.api_key = os.getenv("OPENAI_API_KEY")
engine = "gpt-4o-mini"

# Initialize image and evaluation processors
api_key = 'ddc85b14-bd83-4757-9bc4-8a11194da536'
image_processor = ImageProcessor(api_key)
evaluation_processor = EvaluationProcessor(api_key)

def process_input(text=None, images=None, audio=None, video=None):
    print("Starting process_input")
    system_prompt = (
        "1.你是一个音乐专家,只能回答音乐知识..."
    )
    
    messages = [{"role": "system", "content": system_prompt}]
    prompt = ""

    if text:
        print("Processing text input")
        prompt += f"\nText input: {text}"

    result_path = None

    if images:
        print("Processing images")
        prompt += process_images(images)

    if audio:
        print("Processing audio")
        result, title = process_audio(audio)
        prompt += result
        result_path = title.get('result_path', '')

    if video:
        print("Processing video")
        result, title = process_video(video)
        prompt += result
        result_path = title.get('result_path', '')

    #print("Prepared prompt for ZhipuAI:", prompt)
    #print("Preparing to call get_zhipuai_response")
    return get_zhipuai_response(messages, prompt), result_path


def process_images(images):
    image_bytes_list = []
    for image in images:
        img = Image.open(image.name)
        image_bytes = BytesIO()
        img.save(image_bytes, format="PNG")
        image_bytes.seek(0)
        image_bytes_list.append(image_bytes.getvalue())

    try:
        processed_image_result = image_processor.process_images(image_bytes_list)
        return f"\n乐谱的内容如下,请你根据曲子的曲风回答问题: {processed_image_result}"
    except Exception as e:
        return f"Error processing image: {e}"

def process_audio(audio):
    audio_path = audio.name
    try:
        result, title = evaluation_processor.process_evaluation(audio_path, is_video=False)
        prompt = (
            f'''如果有曲名{title},请你根据这首歌的名字作者,并且'''
            f'''1. 请你从
            "eva_all":综合得分
            "eva_completion":完整性
            "eva_note":按键
            "eva_stability":稳定性
            "eva_tempo_sync":节奏
            几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,只使用中文,曲子为 {result}'''
        )
        return prompt, title
    except Exception as e:
        return f"Error processing audio: {e}", None

def process_video(video):
    video_path = video.name
    try:
        result, title = evaluation_processor.process_evaluation(video_path, is_video=True)
        prompt = (
            f'''如果有曲名{title},请你根据这首歌的名字作者,并且'''
            f'''1.请你从
            "eva_all":综合得分
            "eva_completion":完整性
            "eva_note":按键
            "eva_stability":稳定性
            "eva_tempo_sync":节奏
            几个方面评价一下下面这首曲子演奏的结果, 不用提及键的英文,只使用中文,曲子为 {result}'''
        )
        return prompt, title
    except Exception as e:
        return f"Error processing video: {e}", None

def get_gpt_response(messages, prompt):
    messages.append({"role": "user", "content": prompt})
    response_text = ""

    # Use OpenAI API for streaming response
    try:
        for chunk in openai.ChatCompletion.create(
            model=engine,
            messages=messages,
            temperature=0.2,
            max_tokens=4096,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stream=True  # Enable streaming
        ):
            if 'content' in chunk['choices'][0]['delta']:
                response_text += chunk['choices'][0]['delta']['content']
                yield response_text  # Yield response incrementally
    except Exception as e:
        yield f"Error: {e}"


def get_zhipuai_response_stream(messages, prompt):
    print("Inside get_zhipuai_response")
    client = ZhipuAI(api_key="423ca4c1f712621a4a1740bb6008673b.81aM7DNo2Ssn8FPA")
    messages.append({"role": "user", "content": prompt})
    response_text = ""

    # Use ZhipuAI API for streaming response
    try:
        response = client.chat.completions.create(
            model="glm-4-flash",
            messages=messages,
            stream=True  # Enable streaming
        )
        print("Response received from ZhipuAI")
        print(response)
        for chunk in response:
            print(f"Chunk received: {chunk}")  # Log each chunk
            response_text = chunk.choices[0].delta.content
            print(response_text)
            yield response_text  # Yield response incrementally
    except Exception as e:
        print(f"Error in get_zhipuai_response_stream: {e}")
        yield f"Error: {e}"

def get_zhipuai_response(messages, prompt):
    print("Inside get_zhipuai_response")  # Confirming entry into the function
    client = ZhipuAI(api_key="423ca4c1f712621a4a1740bb6008673b.81aM7DNo2Ssn8FPA")
    
    messages.append({"role": "user", "content": prompt})
    print("Messages prepared:", messages)  # Log messages
    
    response_text = ""
    
    # Non-streaming test
    try:
        print("Calling ZhipuAI API...")  # Log before API call
        response = client.chat.completions.create(
            model="glm-4-flash",
            messages=messages,
            stream=False  # Disable streaming for this test
        )
        print("Response received from ZhipuAI")  # Log response retrieval
        response_text = response.choices[0].message.content
        return response_text  # Return the entire response

    except Exception as e:
        print(f"Error in get_zhipuai_response: {e}")  # More informative error message
        return f"Error: {e}"


# Create Gradio interface
iface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="Input Text", placeholder="我是音乐多模态大模型,您可以上传需要分析的曲谱,音频和视频", lines=2),
        gr.File(label="Input Images", file_count="multiple", type="filepath"),
        gr.File(label="Input Audio, mp3", type="filepath"),
        gr.File(label="Input Video, mp4", type="filepath")
    ],
    outputs=[
        gr.Textbox(label="Output Text", interactive=True),  # Enable streaming in the output
        gr.HTML(label="Webpage")
    ],
    live=False,
)

# Launch Gradio application
iface.launch()