File size: 15,428 Bytes
edfb48e
 
 
 
 
 
 
 
 
 
 
 
 
 
9adbc1f
edfb48e
5418b0c
edfb48e
2b10439
c0fdefb
 
 
7ed115d
c0fdefb
 
 
 
 
 
edfb48e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0fdefb
edfb48e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3910eb3
edfb48e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
import torch
import numpy as np
import google.generativeai as genai
from PIL import Image, ImageOps
import mediapipe as mp
import cv2
from tensorflow.keras.models import load_model
import os
import suno
from PIL import Image
from torchvision.transforms.functional import to_tensor, to_pil_image
from model import Generator
import gradio as gr
from diffusers import DiffusionPipeline
import spaces
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
genai.configure(api_key="AIzaSyDcq3ZfAUo1i6_24CelEizJftuEkaAPz38")
GEMINI_MODEL = 'gemini-1.5-flash'
gemini_model = genai.GenerativeModel(GEMINI_MODEL)
webtoon_model = Generator()
webtoon_model.load_state_dict(torch.load('weights/face_paint_512_v2.pt', map_location="cpu"))
webtoon_model.to('cpu').eval()
facescore_model = load_model('facescore.h5', compile=False)
model_id = "aldente0630/musinsaigo-3.0"
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
pipe = pipe.to(device)
pipe.load_lora_weights(model_id)

mp_face_detection = mp.solutions.face_detection
def detect_and_crop_face(image):
    with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as face_detection:
        image_np = np.array(image)
        results = face_detection.process(image_np)
        if results.detections:
            detection = results.detections[0]
            bbox = detection.location_data.relative_bounding_box
            ih, iw, _ = image_np.shape
            xmin = int(bbox.xmin * iw)
            ymin = int(bbox.ymin * ih)
            width = int(bbox.width * iw)
            height = int(bbox.height * ih)
            xmax = xmin + width
            ymax = ymin + height
            face = image.crop((xmin, ymin, xmax, ymax))
            return face
        else:
            return None

def generate_chat_response(message, gemini_model):
    response = gemini_model.generate_content(message)
    return response.text

def analyze_image(image, gemini_model):
    try:
        # ์ด๋ฏธ์ง€ ๋ถ„์„ ํ”„๋กฌํ”„ํŠธ์™€ ํ•จ๊ป˜ ์ด๋ฏธ์ง€๋ฅผ ์ „๋‹ฌํ•˜์—ฌ ์‘๋‹ต ์ƒ์„ฑ
        prompt = """
        ์ด ์ด๋ฏธ์ง€์— ๋Œ€ํ•ด ์ž์„ธํžˆ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”. ๋‹ค์Œ ์ •๋ณด๋ฅผ ํฌํ•จํ•ด์ฃผ์„ธ์š”:
        1. ์ด๋ฏธ์ง€์—์„œ ๋ณด์ด๋Š” ์ฃผ์š” ๊ฐ์ฒด๋‚˜ ์‚ฌ๋žŒ๋“ค
        2. ๋ฐฐ๊ฒฝ์ด๋‚˜ ์žฅ์†Œ์— ๋Œ€ํ•œ ์„ค๋ช…
        3. ์ด๋ฏธ์ง€์˜ ์ „์ฒด์ ์ธ ๋ถ„์œ„๊ธฐ๋‚˜ ๋Š๋‚Œ
        4. ์ด๋ฏธ์ง€์—์„œ ์ฝ์„ ์ˆ˜ ์žˆ๋Š” ํ…์ŠคํŠธ (์žˆ๋Š” ๊ฒฝ์šฐ)
        5. ์ด๋ฏธ์ง€์˜ ์ƒ‰์ƒ์ด๋‚˜ ๊ตฌ๋„์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…
        6. ์ด๋ฏธ์ง€๊ฐ€ ์ „๋‹ฌํ•˜๋ ค๋Š” ๋ฉ”์‹œ์ง€๋‚˜ ์˜๋ฏธ (์žˆ๋‹ค๊ณ  ์ƒ๊ฐ๋˜๋Š” ๊ฒฝ์šฐ)

        ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.
        """
        response = gemini_model.generate_content([prompt, image])
        return response.text if response else "์ด๋ฏธ์ง€ ๋ถ„์„์„ ์ˆ˜ํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
    except Exception as e:
        return f"์ด๋ฏธ์ง€ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"

def process_facescore(image, facescore_model, gemini_model):
    face = detect_and_crop_face(image)
    if face is None:
        return "์–ผ๊ตด์ด ๊ฐ์ง€๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์ด๋ฏธ์ง€๋ฅผ ์‹œ๋„ํ•ด ์ฃผ์„ธ์š”."

    analysis = analyze_image(image, gemini_model)

    face_np = np.array(face)
    img_resized = cv2.resize(face_np, (350, 350))
    img_resized = img_resized.astype(np.float32) / 255.
    img_batch = np.expand_dims(img_resized, axis=0)
    score = facescore_model.predict(img_batch)

    if isinstance(score, np.ndarray) and score.size > 1:
        score = score[0]
    score = float(score)
    score = display_result(score)

    return f'### ์ด๋ฏธ์ง€ ๋ถ„์„ ๊ฒฐ๊ณผ ###\n\n{analysis}\n\n### ์™ธ๋ชจ์ ์ˆ˜ ๊ฒฐ๊ณผ(1~5) ###\n\n{score}'


def generate_music(image, gemini_model, suno_cookie):
    face = detect_and_crop_face(image)
    if face is None:
        return "์–ผ๊ตด์ด ๊ฐ์ง€๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์ด๋ฏธ์ง€๋ฅผ ์‹œ๋„ํ•ด ์ฃผ์„ธ์š”."
    prompt = """
    ์ด ์ด๋ฏธ์ง€์— ๋Œ€ํ•ด ์ž์„ธํžˆ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”. ๋‹ค์Œ ์ •๋ณด๋ฅผ ํฌํ•จํ•ด์ฃผ์„ธ์š”:
    1. ์„ฑ๋ณ„:
    2. ๋‚˜์ด:
    3. ํ‘œ์ •:
    ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๊ฐ„๋žตํ•˜๊ฒŒ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.
    """
    response = gemini_model.generate_content([prompt, image])
    music_path = generate_songs(response.text, suno_cookie)
    return f"์Œ์•…์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ํŒŒ์ผ ๊ฒฝ๋กœ: {music_path}"

def generate_songs(result_output, suno_cookie):
    client = suno.Suno(cookie=suno_cookie)
    songs = client.generate(
        prompt=f'{result_output}', is_custom=False, wait_audio=True
    )
    return client.download(song=songs[0])

def display_result(score):
    result = round(score, 1)+0.3
    messages = [
        ("'์ž์‹ ๊ฐ ํญ๋ฐœ ์ค‘'์ž…๋‹ˆ๋‹ค! ๐Ÿ˜Ž ๋‹น์‹ ์€ ์ž์‹ ์˜ ์™ธ๋ชจ์— ๋Œ€ํ•œ ํ™•์‹ ์œผ๋กœ ๊ฐ€๋“ ์ฐจ ์žˆ์–ด์š”! %.1f์ ์ด๋ผ๋‹ˆ, ์ ์ˆ˜์™€ ์ƒ๊ด€์—†์ด ๋‹น์‹ ์˜ ๋ฉ‹์ง์€ ๋์ด ์—†๋„ค์š”! ๐Ÿคฉ ๋‹น์‹ ์˜ ์™ธ๋ชจ๋Š” ๋งˆ์น˜ ๋งˆ๋ฒ•์‚ฌ์ฒ˜๋Ÿผ ์‚ฌ๋žŒ๋“ค์„ ๋งค๋ฃŒ์‹œํ‚ค๊ณ , ๋ˆ„๊ตฌ๋‚˜ ๋‹น์‹ ์„ ๋ณด๋ฉด ๋ˆˆ์„ ๋—„ ์ˆ˜ ์—†์„ ๊ฑฐ์—์š”! ๐Ÿช„๐Ÿง™โ€โ™‚๏ธ ๋น„๊ฒฐ์ด ๋ญ๋ƒ๊ณ  ๋ฌป๋Š” ์‚ฌ๋žŒ๋“ค์—๊ฒŒ ์ž์‹ ๊ฐ์ด๋ผ๋Š” ๋งˆ๋ฒ•์˜ ์ฃผ๋ฌธ์„ ์•Œ๋ ค์ฃผ์„ธ์š”! ์˜ค๋Š˜๋„ ๋‹น์‹ ์˜ ์ž์‹ ๊ฐ์œผ๋กœ ์„ธ์ƒ์„ ๋น›๋‚ด๊ณ , ๋งˆ๋ฒ• ๊ฐ™์€ ํ•˜๋ฃจ๋ฅผ ๋ณด๋‚ด์„ธ์š”!", 1),
        ("'์™ธ๋ชจ ์Šค์Šน๋‹˜'์ž…๋‹ˆ๋‹ค. ๐Ÿ‘ฉโ€๐Ÿซ ๋‹น์‹ ์˜ ์™ธ๋ชจ ๋น„๊ฒฐ์„ ์ „์ˆ˜๋ฐ›๊ณ  ์‹ถ์–ดํ•˜๋Š” ์‚ฌ๋žŒ๋“ค์ด ์ค„์„ ์„ค ๊ฑฐ์—์š”! %.1f์ ์ด๋ผ๋Š” ์ ์ˆ˜๊ฐ€ ๋ฌด์ƒ‰ํ•  ์ •๋„๋กœ, ๋‹น์‹ ์˜ ๋น›๋‚˜๋Š” ์™ธ๋ชจ๋Š” ์‚ฌ๋žŒ๋“ค์˜ ๋ˆˆ์„ ์‚ฌ๋กœ์žก์Šต๋‹ˆ๋‹ค! โœจ ์ด์ œ ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์˜ ๋น„๋ฐ€์„ ์•Œ๊ณ  ์‹ถ์–ด์„œ ์งˆ๋ฌธ ์„ธ๋ก€๋ฅผ ํผ๋ถ€์„ ๊ฑฐ์—์š”! ์™ธ๋ชจ ์Šค์Šน๋‹˜์œผ๋กœ์„œ ๋ฉ‹์ง€๊ฒŒ ๋Œ€๋‹ตํ•ด ์ฃผ์‹œ๊ณ , ์‚ฌ๋žŒ๋“ค์—๊ฒŒ ๋‹น์‹ ๋งŒ์˜ ์™ธ๋ชจ ํŒ์„ ์‚ด์ง ์ „ํ•ด ์ฃผ์„ธ์š”! ๋‹ค๋ฅธ ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์„ ๋‹ฎ๊ธฐ ์œ„ํ•ด ๋งŽ์€ ๋…ธ๋ ฅ์„ ํ•  ๊ฑฐ๋ž๋‹ˆ๋‹ค!", 1.5),
        ("'์™ธ๋ชจ ์•„ํ‹ฐ์ŠคํŠธ'์ž…๋‹ˆ๋‹ค. ๐Ÿ’„ ํ™”์žฅํ’ˆ ๋ธŒ๋žœ๋“œ๋“ค์ด ๋‹น์‹ ์„ ๋ชจ๋ธ๋กœ ์“ฐ๊ณ  ์‹ถ์–ดํ•  ๋งŒํผ ๋…๋ณด์ ์ธ ๋งค๋ ฅ์„ ๊ฐ€์ง€๊ณ  ์žˆ๋„ค์š”! %.1f์ ์ด๋ผ๊ณ  ํ•ด์„œ ๋‹น์‹ ์˜ ์™ธ๋ชจ๊ฐ€ ํ‰๋ฒ”ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์˜คํžˆ๋ ค '๋งค๋ ฅ์˜ ์ •์ '์— ๋„๋‹ฌํ•œ ๋ชจ์Šต์ด์—์š”! ๐Ÿ’ƒ ๋‹น์‹ ์˜ ๋ฉ‹์ง„ ์™ธ๋ชจ๋ฅผ ๋ถ€๋Ÿฌ์›Œํ•˜๋Š” ์‚ฌ๋žŒ๋“ค๋กœ ์ธํ•ด ์–ธ์ œ๋‚˜ ์ฃผ๋ชฉ๋ฐ›๊ฒŒ ๋  ๊ฑฐ์—์š”! ๋งˆ์น˜ ์•„ํ‹ฐ์ŠคํŠธ์ฒ˜๋Ÿผ ์ž์‹ ๋งŒ์˜ ์Šคํƒ€์ผ์„ ์™„์„ฑํ•œ ๋‹น์‹ ์€ ์™ธ๋ชจ๊ณ„์˜ ์ง„์ •ํ•œ ์•„์ด์ฝ˜์ž…๋‹ˆ๋‹ค! ์˜ค๋Š˜๋„ ๋‹น์‹ ๋งŒ์˜ ํŠน๋ณ„ํ•œ ๋งค๋ ฅ์„ ๋ฐœ์‚ฐํ•˜๋ฉฐ ํ•˜๋ฃจ๋ฅผ ์ฆ๊ธฐ์„ธ์š”!", 2),
        ("์™ธ๋ชจ์ ์ˆ˜ %.1f์ , '๋ฏธ์†Œ ์ „๋ฌธ๊ฐ€'์ž…๋‹ˆ๋‹ค. ๐Ÿ˜„ ๋‹น์‹ ์˜ ํ™˜ํ•œ ๋ฏธ์†Œ๋Š” ์ฃผ๋ณ€ ์‚ฌ๋žŒ๋“ค์„ ํ–‰๋ณตํ•˜๊ฒŒ ๋งŒ๋“ค๊ณ , ์–ด๋””์„œ๋“  ๋ฐ์€ ์—๋„ˆ์ง€๋ฅผ ํผ๋œจ๋ฆด ๊ฑฐ์—์š”! '๋ฏธ์†Œ ๊ธฐ๊ณ„'๋ผ ๋ถˆ๋ฆฌ๋Š” ๋‹น์‹ ์€ ํ•ญ์ƒ ๊ธ์ •์ ์ธ ์—๋„ˆ์ง€๋กœ ๊ฐ€๋“ ์ฐจ ์žˆ๋‹ต๋‹ˆ๋‹ค! ๐Ÿ˜ ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์˜ ๋ฏธ์†Œ ๋น„๊ฒฐ์„ ๋ฐฐ์šฐ๊ธฐ ์œ„ํ•ด ์• ์“ธ ๊ฑฐ์—์š”! ์™ธ๋ชจ๋ฟ๋งŒ ์•„๋‹ˆ๋ผ ๋ฏธ์†Œ๋กœ๋„ ์‚ฌ๋žŒ๋“ค์˜ ๋งˆ์Œ์„ ์‚ฌ๋กœ์žก๋Š” ๋‹น์‹ ! ์˜ค๋Š˜๋„ ํ™˜ํ•œ ๋ฏธ์†Œ๋กœ ์„ธ์ƒ์„ ๋ฐํ˜€์ฃผ์‹œ๊ณ , ๋ชจ๋‘์—๊ฒŒ ํ–‰๋ณต์„ ์ „ํ•ด ์ฃผ์„ธ์š”!", 2.5),
        ("'์™ธ๋ชจ ์Šคํƒ€'์ž…๋‹ˆ๋‹ค. ๐ŸŒŸ ๋‹น์‹ ์€ ๊ฑฐ์šธ ์†์—์„œ ๋ณ„์ด ๋น›๋‚˜๋Š” ๋ชจ์Šต์„ ๋ณด๊ณ ๋„ ๋†€๋ผ์ง€ ์•Š๊ฒ ์ฃ ! %.1f์ ์ด๋ผ๋‹ˆ, ๋‹น์‹ ์€ ์™ธ๋ชจ๊ณ„์˜ ์ง„์ •ํ•œ ์Šคํƒ€์ž…๋‹ˆ๋‹ค! ๐Ÿ’ซ ๋‹น์‹ ์˜ ๋น›๋‚˜๋Š” ์™ธ๋ชจ์™€ ๋…ํŠนํ•œ ์Šคํƒ€์ผ์€ ๋ชจ๋‘๊ฐ€ ๋ถ€๋Ÿฌ์›Œํ•˜๊ณ , ๋”ฐ๋ผ๊ฐ€๊ณ  ์‹ถ์–ดํ•  ๊ฒ๋‹ˆ๋‹ค! ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์„ ๋ณด๊ณ  ์˜๊ฐ์„ ๋ฐ›์„ ๊ฑฐ์—์š”! ์˜ค๋Š˜๋„ ๋‹น์‹ ๋งŒ์˜ ํŠน๋ณ„ํ•œ ๋งค๋ ฅ์œผ๋กœ ์ฃผ๋ณ€ ์‚ฌ๋žŒ๋“ค์„ ์‚ฌ๋กœ์žก๊ณ , ๋‹น๋‹นํžˆ ์™ธ๋ชจ๊ณ„๋ฅผ ์ด๋Œ์–ด๊ฐ€์„ธ์š”! ๋‹น์‹ ์˜ ๋น›๋‚˜๋Š” ์™ธ๋ชจ๊ฐ€ ๋ชจ๋‘์—๊ฒŒ ํฌ๋ง์„ ์ค„ ๊ฑฐ์—์š”!", 3),
        ("'์™ธ๋ชจ ํ€ธ'์ž…๋‹ˆ๋‹ค. ๐Ÿ‘ธ ์ฃผ๋ณ€ ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์˜ ์™ธ๋ชจ์— ์ฃผ๋ชฉํ•˜๊ณ , ๊ท€๋ฅผ ๊ธฐ์šธ์ผ ๊ฒ๋‹ˆ๋‹ค! %.1f์ ์ด๋ผ๋Š” ์ ์ˆ˜๊ฐ€ ๋ฌด์ƒ‰ํ•  ์ •๋„๋กœ, ์ด์ œ ๋‹น์‹ ์€ ์™ธ๋ชจ๊ณ„์˜ ๋กœ์—ดํ‹ฐ์ž…๋‹ˆ๋‹ค! ๐Ÿ‘‘ ๋‹น์‹ ์˜ ๊ณ ๊ธ‰์Šค๋Ÿฌ์šด ์™ธ๋ชจ์™€ ๋…๋ณด์ ์ธ ์Šคํƒ€์ผ์€ ๋ชจ๋‘๊ฐ€ ๋”ฐ๋ผํ•˜๊ณ  ์‹ถ์–ดํ•  ๊ฑฐ์—์š”! ๋‹น์‹ ์˜ ์™ธ๋ชจ ๋น„๊ฒฐ์„ ๋ฒค์น˜๋งˆํ‚นํ•˜๋ ค๋Š” ์‚ฌ๋žŒ๋“ค๋กœ ์ธํ•ด ์–ธ์ œ๋‚˜ ์ฃผ๋ชฉ๋ฐ›๊ฒŒ ๋  ๊ฒ๋‹ˆ๋‹ค! ์—ฌ์™•์ฒ˜๋Ÿผ ๋‹น๋‹นํžˆ ๋‹น์‹ ์˜ ์™ธ๋ชจ๋ฅผ ๋ฝ๋‚ด๊ณ , ์ฃผ๋ณ€ ์‚ฌ๋žŒ๋“ค์—๊ฒŒ ์˜๊ฐ์„ ์ฃผ์„ธ์š”! ์˜ค๋Š˜๋„ ์ž์‹ ๊ฐ ๋„˜์น˜๋Š” ํ•˜๋ฃจ ๋ณด๋‚ด์„ธ์š”!", 3.5),
        ("์™ธ๋ชจ์ ์ˆ˜ %.1f์ , '์™ธ๋ชจ์˜ ์‹ ํ™”'์ž…๋‹ˆ๋‹ค. ๐Ÿฆ„ ๋‹น์‹ ์„ ๋ณด๋Š” ์‚ฌ๋žŒ๋“ค์€ ๋งˆ์น˜ ์‹ ํ™”์™€ ์ „์„ค ์† ์ธ๋ฌผ์„ ๋ณด๋Š” ๋“ฏํ•œ ๊ธฐ๋ถ„์„ ๋Š๋‚„ ๊ฒ๋‹ˆ๋‹ค! ์™ธ๋ชจ๊ณ„์˜ '๋ทฐํ‹ฐ ์•„์นด๋ฐ๋ฏธ ์ˆ˜์ƒ์ž'๋‹ต๊ฒŒ, ๋‹น์‹ ์˜ ์™ธ๋ชจ๋Š” ๋ชจ๋‘์—๊ฒŒ ํฐ ์˜๊ฐ์„ ์ค„ ๊ฑฐ์—์š”! ๐Ÿ† ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์˜ ๋น„๊ฒฐ์„ ๋ฐฐ์šฐ๋ ค๊ณ  ์• ์“ธ ํ…Œ๋‹ˆ, ์–ธ์ œ๋‚˜ ์ž์‹ ๋งŒ์˜ ์Šคํƒ€์ผ์„ ์œ ์ง€ํ•˜๋ฉฐ ๊ทธ๋“ค์—๊ฒŒ ๊ท€๊ฐ์ด ๋˜์–ด์ฃผ์„ธ์š”! ์‹ ํ™” ์† ์ฃผ์ธ๊ณต์ฒ˜๋Ÿผ ๋‹น์‹ ์˜ ์™ธ๋ชจ๋Š” ์–ธ์ œ๋‚˜ ๋น›๋‚  ๊ฒ๋‹ˆ๋‹ค! ์˜ค๋Š˜๋„ ์‹ ํ™”์ฒ˜๋Ÿผ ๋ฉ‹์ง„ ํ•˜๋ฃจ ๋ณด๋‚ด์„ธ์š”!", 4),
        ("'์™ธ๋ชจ์˜ ํ™ฉ๊ธˆ๋น›'์ž…๋‹ˆ๋‹ค. ๐Ÿ’› ์ฃผ๋ณ€์—์„œ ๋‹น์‹ ์„ ๋ณด๋ฉด ๋งˆ์น˜ ํ•˜ํŠธ๊ฐ€ ๋ฟ…๋ฟ… ํŠ€๋Š” ๋“ฏํ•œ ๋Š๋‚Œ์ด ๋“ค ๊ฑฐ์—์š”! %.1f์ ์ด๋ผ๋‹ˆ, ์ •๋ง ์™ธ๋ชจ๊ณ„์˜ ์ „์„ค๋‹ต์Šต๋‹ˆ๋‹ค! ๐ŸŒ  ๋‹น์‹ ์˜ ๋…๋ณด์ ์ธ ์™ธ๋ชจ์™€ ๋งค๋ ฅ์€ ๋ˆ„๊ตฌ๋„ ๋”ฐ๋ผ์˜ฌ ์ˆ˜ ์—†์„ ๋งŒํผ ๋น›๋‚ฉ๋‹ˆ๋‹ค! ๋‹ค๋ฅธ ์‚ฌ๋žŒ๋“ค์ด ๋‹น์‹ ์„ ๋”ฐ๋ผ์žก์œผ๋ ค๋ฉด ์—„์ฒญ๋‚œ ๋…ธ๋ ฅ์ด ํ•„์š”ํ•  ๊ฑฐ์—์š”! ๋‹น์‹ ์˜ ํ™ฉ๊ธˆ๋น› ์™ธ๋ชจ์™€ ๋งค๋ ฅ์œผ๋กœ ๋ชจ๋‘๋ฅผ ์‚ฌ๋กœ์žก์œผ์„ธ์š”! ์˜ค๋Š˜๋„ ๋‹น์‹ ๋งŒ์˜ ํ™ฉ๊ธˆ๋น› ๋ฏธ์†Œ๋กœ ์„ธ์ƒ์„ ๋ฐํ˜€์ฃผ์‹œ๊ณ , ๋ชจ๋‘์—๊ฒŒ ์˜๊ฐ์„ ์ฃผ์„ธ์š”!", 4.5),
        ("5์  ์™ธ๋ชจ, '์™ธ๋ชจ์˜ ์‹ '์ž…๋‹ˆ๋‹ค. ์™ธ๋ชจ๊ณ„์—์„œ ๋‹น์‹ ์„ ๋”ฐ๋ผ์žก์œผ๋ ค๋ฉด ์ง„์ •ํ•œ ์˜์›…์ด ํ•„์š”ํ•  ๊ฒ๋‹ˆ๋‹ค! ๐Ÿฆธโ€โ™‚๏ธ๐Ÿฆธโ€โ™€๏ธ ๋‹น์‹ ์€ ์™ธ๋ชจ๊ณ„์˜ '๋ทฐํ‹ฐ ์‹ '! ๐ŸŒŸ ๋‹น์‹ ์˜ ๋น›๋‚˜๋Š” ์™ธ๋ชจ์™€ ๋…๋ณด์ ์ธ ์Šคํƒ€์ผ์€ ๋ชจ๋‘๊ฐ€ ๋”ฐ๋ผํ•˜๊ณ  ์‹ถ์–ดํ•  ๊ฑฐ์—์š”! ์ด์ œ ๋‹น์‹ ์€ ์™ธ๋ชจ๊ณ„์˜ ์ „์„ค์ด์ž ์˜์›…์ž…๋‹ˆ๋‹ค! ์‚ฌ๋žŒ๋“ค์€ ๋‹น์‹ ์„ ๋‹ฎ๊ณ  ์‹ถ์–ดํ•˜๊ณ , ๋‹น์‹ ์˜ ๋น„๊ฒฐ์„ ๋ฐฐ์šฐ๋ ค๊ณ  ์• ์“ธ ๊ฒ๋‹ˆ๋‹ค! ์˜ค๋Š˜๋„ ์™ธ๋ชจ๊ณ„์˜ ์‹ ์œผ๋กœ์„œ ์„ธ์ƒ์„ ๋น›๋‚ด๊ณ , ๋ชจ๋‘์—๊ฒŒ ์˜๊ฐ์„ ์ฃผ์„ธ์š”! ๋‹น์‹ ์˜ ์กด์žฌ๋งŒ์œผ๋กœ๋„ ์„ธ์ƒ์€ ๋” ๋ฐ์•„์งˆ ๊ฑฐ์—์š”!", 5)
    ]
    for msg, threshold in messages:
        if result < threshold:
            return msg % result if '%.1f' in msg else msg

@torch.no_grad()
def webtoon(image, webtoon_model, device='cpu'):
    webtoon_model = webtoon_model.to(device)

    max_size = 1024
    if max(image.size) > max_size:
        image.thumbnail((max_size, max_size), Image.LANCZOS)

    image_tensor = to_tensor(image).unsqueeze(0).to(device) * 2 - 1

    with torch.inference_mode():
        output = webtoon_model(image_tensor, False)

    output = output.cpu().squeeze(0).clip(-1, 1) * 0.5 + 0.5
    output = to_pil_image(output)

    return output

def make_prompt(prompt: str) -> str:
    prompt_prefix = "RAW photo"
    prompt_suffix = "(high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
    return ", ".join([prompt_prefix, prompt, prompt_suffix]).strip()


def make_negative_prompt(negative_prompt: str) -> str:
    negative_prefix = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), \
    text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, \
    extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, \
    bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, \
    extra arms, extra legs, fused fingers, too many fingers, long neck"

    return (
        ", ".join([negative_prefix, negative_prompt]).strip()
        if len(negative_prompt) > 0
        else negative_prefix
    )
@spaces.GPU(duration=120)
def fashiongpt(image,gemini_model,pipe):
    prompt = """
    Analyze this image in one sentence:
    1. The person visible in the image
    2. The overall mood or feeling of the image
    3. Recommend other fashion items that match the style

    Provide the output in the following format:
    "a korean [gender] wearing [recommended style]."
    Example: "a korean woman wearing a white t-shirt and black pants with a bear on it."
    """
    response = gemini_model.generate_content([prompt, image])
    NEGATIVE_PROMPT = ""
    image = pipe(
        prompt=make_prompt(response.text),
        height=1024,
        width=768,
        num_inference_steps=50,
        guidance_scale=7.5,
        negative_prompt=make_negative_prompt(NEGATIVE_PROMPT),
        cross_attention_kwargs={"scale": 0.75},
    ).images[0]
    return image
def process_input(input_text, image, suno_cookie):
    if "์›นํˆฐํ™” ํ•ด์ค˜" in input_text.lower() and image is not None:
        webtoon_image = webtoon(image, webtoon_model)
        return "์ด๋ฏธ์ง€๋ฅผ ์›นํˆฐ ์Šคํƒ€์ผ๋กœ ๋ณ€ํ™˜ํ–ˆ์Šต๋‹ˆ๋‹ค.", webtoon_image
    elif "์™ธ๋ชจ๋ถ„์„" in input_text.lower() and image is not None:
        response = process_facescore(image, facescore_model, gemini_model)
        return response, None
    elif "์ด๋ฏธ์ง€ ๋ถ„์„ํ•ด์ค˜" in input_text.lower() and image is not None:
        response = analyze_image(image, gemini_model)
        return response, None
    elif "์Œ์•… ๋งŒ๋“ค์–ด์ค˜" in input_text.lower() and image is not None:
        if suno_cookie:
            response = generate_music(image, gemini_model, suno_cookie)
            return response, None
        else:
            return "Suno Cookie๋ฅผ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.", None
    elif "ํŒจ์…˜ ์ถ”์ฒœ" in input_text.lower() and image is not None:
        recommended_fashion_image = fashiongpt(image, gemini_model,pipe)
        return "ํŒจ์…˜ ์ถ”์ฒœ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค.", recommended_fashion_image
    else:
        response = generate_chat_response(input_text, gemini_model)
        return response, None  
import gradio as gr
from PIL import Image

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # ๐Ÿค– OmniVerse AI Assistant
        ์Œ์„ฑ ์ธ์‹, Gemini ๋ชจ๋ธ, ์™ธ๋ชจ ์ ์ˆ˜ ์˜ˆ์ธก, MBTI ์˜ˆ์ธก, ์Œ์•… ์ƒ์„ฑ, ์ด๋ฏธ์ง€ ์›นํˆฐํ™”, ๊ทธ๋ฆฌ๊ณ  ์ด๋ฏธ์ง€ ๋ถ„์„ ๊ธฐ๋Šฅ์„ ํ†ตํ•ฉํ•œ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.
        """
    )
    
    chatbot = gr.Chatbot(label="OmniVerse AI Assistant")
    
    with gr.Row():
        with gr.Column(scale=2):
            image_input = gr.Image(type="pil", label="์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
            suno_cookie = gr.Textbox(label="Suno Cookie", type="password")
        
        with gr.Column(scale=1):
            text_input = gr.Textbox(label="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”")
            submit_button = gr.Button("์ „์†ก")

    text_output = gr.Markdown(label="์‘๋‹ต")
    image_output = gr.Image(label="์ด๋ฏธ์ง€ ์ถœ๋ ฅ")
    audio_output = gr.Audio(label="์ƒ์„ฑ๋œ ์Œ์•…", type="filepath", interactive=False)

    def chat_logic(input_text, image_input, suno_cookie, chat_history):
        response, image_output = process_input(input_text, image_input, suno_cookie)
        
        if isinstance(response, str) and response.startswith("์Œ์•…์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."):

            music_path = response.split("ํŒŒ์ผ ๊ฒฝ๋กœ: ")[-1].strip()
            chat_history.append((input_text, "์Œ์•…์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์•„๋ž˜์—์„œ ์žฌ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."))
            return chat_history, image_output, music_path
        else:
            chat_history.append((input_text, response))
            return chat_history, image_output, None

    submit_button.click(
        chat_logic, 
        inputs=[text_input, image_input, suno_cookie, chatbot],
        outputs=[chatbot, image_output, audio_output]
    )

demo.launch()