openfree commited on
Commit
5039c8f
β€’
1 Parent(s): ef46da3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1228
app.py CHANGED
@@ -1,1229 +1,2 @@
1
- import gradio as gr
2
- from gradio_toggle import Toggle
3
- import torch
4
- from huggingface_hub import snapshot_download
5
- from transformers import pipeline
6
-
7
- from xora.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
8
- from xora.models.transformers.transformer3d import Transformer3DModel
9
- from xora.models.transformers.symmetric_patchifier import SymmetricPatchifier
10
- from xora.schedulers.rf import RectifiedFlowScheduler
11
- from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
12
- from transformers import T5EncoderModel, T5Tokenizer
13
- from xora.utils.conditioning_method import ConditioningMethod
14
- from pathlib import Path
15
- import safetensors.torch
16
- import json
17
- import numpy as np
18
- import cv2
19
- from PIL import Image
20
- import tempfile
21
  import os
22
- import gc
23
- from openai import OpenAI
24
- import re
25
- import time
26
- # Load system prompts
27
- system_prompt_t2v = """당신은 λΉ„λ””μ˜€ 생성을 μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
28
- 주어진 ν”„λ‘¬ν”„νŠΈλ₯Ό λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
29
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
30
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
31
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
32
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
33
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
34
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
35
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
36
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
37
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
38
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
39
-
40
- system_prompt_i2v = """당신은 이미지 기반 λΉ„λ””μ˜€ 생성을 μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
41
- 주어진 ν”„λ‘¬ν”„νŠΈλ₯Ό λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
42
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
43
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
44
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
45
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
46
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
47
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
48
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
49
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
50
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
51
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
52
-
53
- # Load Hugging Face token if needed
54
- hf_token = os.getenv("HF_TOKEN")
55
- openai_api_key = os.getenv("OPENAI_API_KEY")
56
- client = OpenAI(api_key=openai_api_key)
57
-
58
- # Initialize translation pipeline with device and clean_up settings
59
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
60
- translator = pipeline(
61
- "translation",
62
- model="Helsinki-NLP/opus-mt-ko-en",
63
- device=device,
64
- clean_up_tokenization_spaces=True
65
- )
66
-
67
- # Korean text detection function
68
- def contains_korean(text):
69
- korean_pattern = re.compile('[γ„±-γ…Žγ…-γ…£κ°€-힣]')
70
- return bool(korean_pattern.search(text))
71
-
72
- def translate_korean_prompt(prompt, max_length=450):
73
- """
74
- Translate Korean prompt to English if Korean text is detected
75
- Split long text into chunks if necessary
76
- """
77
- if not contains_korean(prompt):
78
- return prompt
79
-
80
- # Split long text into chunks
81
- def split_text(text, max_length):
82
- words = text.split()
83
- chunks = []
84
- current_chunk = []
85
- current_length = 0
86
-
87
- for word in words:
88
- if current_length + len(word) + 1 > max_length:
89
- chunks.append(' '.join(current_chunk))
90
- current_chunk = [word]
91
- current_length = len(word)
92
- else:
93
- current_chunk.append(word)
94
- current_length += len(word) + 1
95
-
96
- if current_chunk:
97
- chunks.append(' '.join(current_chunk))
98
- return chunks
99
-
100
- try:
101
- if len(prompt) > max_length:
102
- chunks = split_text(prompt, max_length)
103
- translated_chunks = []
104
-
105
- for chunk in chunks:
106
- translated = translator(chunk, max_length=512)[0]['translation_text']
107
- translated_chunks.append(translated)
108
-
109
- final_translation = ' '.join(translated_chunks)
110
- else:
111
- final_translation = translator(prompt, max_length=512)[0]['translation_text']
112
-
113
- print(f"Original Korean prompt: {prompt}")
114
- print(f"Translated English prompt: {final_translation}")
115
- return final_translation
116
-
117
- except Exception as e:
118
- print(f"Translation error: {e}")
119
- return prompt # Return original prompt if translation fails
120
-
121
- def enhance_prompt(prompt, type="t2v"):
122
- system_prompt = system_prompt_t2v if type == "t2v" else system_prompt_i2v
123
- messages = [
124
- {"role": "system", "content": system_prompt},
125
- {"role": "user", "content": prompt},
126
- ]
127
-
128
- try:
129
- response = client.chat.completions.create(
130
- model="gpt-4-1106-preview",
131
- messages=messages,
132
- max_tokens=2000,
133
- )
134
- enhanced_prompt = response.choices[0].message.content.strip()
135
-
136
- print("\n=== ν”„λ‘¬ν”„νŠΈ 증강 κ²°κ³Ό ===")
137
- print("Original Prompt:")
138
- print(prompt)
139
- print("\nEnhanced Prompt:")
140
- print(enhanced_prompt)
141
- print("========================\n")
142
-
143
- return enhanced_prompt
144
- except Exception as e:
145
- print(f"Error during prompt enhancement: {e}")
146
- return prompt
147
-
148
- def update_prompt_t2v(prompt, enhance_toggle):
149
- return update_prompt(prompt, enhance_toggle, "t2v")
150
-
151
- def update_prompt_i2v(prompt, enhance_toggle):
152
- return update_prompt(prompt, enhance_toggle, "i2v")
153
-
154
- def update_prompt(prompt, enhance_toggle, type="t2v"):
155
- if enhance_toggle:
156
- return enhance_prompt(prompt, type)
157
- return prompt
158
-
159
- # Set model download directory within Hugging Face Spaces
160
- model_path = "asset"
161
- if not os.path.exists(model_path):
162
- snapshot_download(
163
- "Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token
164
- )
165
-
166
- # Global variables to load components
167
- vae_dir = Path(model_path) / "vae"
168
- unet_dir = Path(model_path) / "unet"
169
- scheduler_dir = Path(model_path) / "scheduler"
170
-
171
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
172
-
173
- def load_vae(vae_dir):
174
- vae_ckpt_path = vae_dir / "vae_diffusion_pytorch_model.safetensors"
175
- vae_config_path = vae_dir / "config.json"
176
- with open(vae_config_path, "r") as f:
177
- vae_config = json.load(f)
178
- vae = CausalVideoAutoencoder.from_config(vae_config)
179
- vae_state_dict = safetensors.torch.load_file(vae_ckpt_path)
180
- vae.load_state_dict(vae_state_dict)
181
- return vae.to(device=device, dtype=torch.bfloat16)
182
-
183
- def load_unet(unet_dir):
184
- unet_ckpt_path = unet_dir / "unet_diffusion_pytorch_model.safetensors"
185
- unet_config_path = unet_dir / "config.json"
186
- transformer_config = Transformer3DModel.load_config(unet_config_path)
187
- transformer = Transformer3DModel.from_config(transformer_config)
188
- unet_state_dict = safetensors.torch.load_file(unet_ckpt_path)
189
- transformer.load_state_dict(unet_state_dict, strict=True)
190
- return transformer.to(device=device, dtype=torch.bfloat16)
191
-
192
- def load_scheduler(scheduler_dir):
193
- scheduler_config_path = scheduler_dir / "scheduler_config.json"
194
- scheduler_config = RectifiedFlowScheduler.load_config(scheduler_config_path)
195
- return RectifiedFlowScheduler.from_config(scheduler_config)
196
-
197
- def center_crop_and_resize(frame, target_height, target_width):
198
- # State 객체인 경우 value 값을 κ°€μ Έμ˜΄
199
- if isinstance(target_height, gr.State):
200
- target_height = target_height.value
201
- if isinstance(target_width, gr.State):
202
- target_width = target_width.value
203
-
204
- h, w, _ = frame.shape
205
- aspect_ratio_target = target_width / target_height
206
- aspect_ratio_frame = w / h
207
-
208
- if aspect_ratio_frame > aspect_ratio_target:
209
- new_width = int(h * aspect_ratio_target)
210
- x_start = (w - new_width) // 2
211
- frame_cropped = frame[:, x_start : x_start + new_width]
212
- else:
213
- new_height = int(w / aspect_ratio_target)
214
- y_start = (h - new_height) // 2
215
- frame_cropped = frame[y_start : y_start + new_height, :]
216
-
217
- frame_resized = cv2.resize(frame_cropped, (target_width, target_height))
218
- return frame_resized
219
-
220
-
221
-
222
- def load_image_to_tensor_with_resize(image_path, target_height=512, target_width=768):
223
- image = Image.open(image_path).convert("RGB")
224
- image_np = np.array(image)
225
- frame_resized = center_crop_and_resize(image_np, target_height, target_width)
226
- frame_tensor = torch.tensor(frame_resized).permute(2, 0, 1).float()
227
- frame_tensor = (frame_tensor / 127.5) - 1.0
228
- return frame_tensor.unsqueeze(0).unsqueeze(2)
229
-
230
- # Load models
231
- vae = load_vae(vae_dir)
232
- unet = load_unet(unet_dir)
233
- scheduler = load_scheduler(scheduler_dir)
234
- patchifier = SymmetricPatchifier(patch_size=1)
235
- text_encoder = T5EncoderModel.from_pretrained(
236
- "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder"
237
- ).to(device)
238
- tokenizer = T5Tokenizer.from_pretrained(
239
- "PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer"
240
- )
241
-
242
- pipeline = XoraVideoPipeline(
243
- transformer=unet,
244
- patchifier=patchifier,
245
- text_encoder=text_encoder,
246
- tokenizer=tokenizer,
247
- scheduler=scheduler,
248
- vae=vae,
249
- ).to(device)
250
-
251
-
252
-
253
- # Preset options for resolution and frame configuration
254
- # Convert frames to seconds assuming 25 FPS
255
- preset_options = [
256
- {"label": "[16:9 HD] 1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
257
- {"label": "[16:9] 1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
258
- {"label": "[16:9] 1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
259
- {"label": "[16:9] 896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
260
- {"label": "[16:9] 800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
261
- {"label": "[16:9] 736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
262
- {"label": "[16:9] 704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
263
- {"label": "[16:9] 608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
264
- {"label": "[16:9] 576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
265
- {"label": "[16:9] 544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
266
- {"label": "[16:9] 512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
267
- {"label": "[3:2] 704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
268
- {"label": "[3:2] 512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
269
- {"label": "[1:1] 704x704, 2.3초", "width": 704, "height": 704, "num_frames": 57},
270
- {"label": "[9:16] 608x1088, 2.0초", "width": 608, "height": 1088, "num_frames": 49},
271
- {"label": "[9:16] 448x800, 4.2초", "width": 448, "height": 800, "num_frames": 105},
272
- ]
273
-
274
- def preset_changed(preset):
275
- selected = next((item for item in preset_options if item["label"] == preset), None)
276
- if selected is None:
277
- raise gr.Error("Invalid preset selected")
278
- return [
279
- gr.State(value=selected["height"]),
280
- gr.State(value=selected["width"]),
281
- gr.State(value=selected["num_frames"]),
282
- gr.update(visible=False),
283
- gr.update(visible=False),
284
- gr.update(visible=False),
285
- ]
286
-
287
- def generate_video_from_text(
288
- prompt,
289
- enhance_prompt_toggle,
290
- negative_prompt,
291
- frame_rate,
292
- seed,
293
- num_inference_steps,
294
- guidance_scale,
295
- height,
296
- width,
297
- num_frames,
298
- progress=gr.Progress(),
299
- ):
300
- # State 객체의 value 값을 κ°€μ Έμ˜΄
301
- height = height.value if isinstance(height, gr.State) else height
302
- width = width.value if isinstance(width, gr.State) else width
303
- num_frames = num_frames.value if isinstance(num_frames, gr.State) else num_frames
304
-
305
- if len(prompt.strip()) < 50:
306
- raise gr.Error(
307
- "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
308
- duration=5,
309
- )
310
-
311
- # ν”„λ‘¬ν”„νŠΈ κ°œμ„ μ΄ ν™œμ„±ν™”λœ 경우
312
- if enhance_prompt_toggle:
313
- prompt = enhance_prompt(prompt, "t2v")
314
-
315
- # Translate Korean prompts to English
316
- prompt = translate_korean_prompt(prompt)
317
- negative_prompt = translate_korean_prompt(negative_prompt)
318
-
319
- # κΈ°λ³Έκ°’ μ„€μ •
320
- height = height or 320
321
- width = width or 512
322
- num_frames = num_frames or 257
323
- frame_rate = frame_rate or 25
324
- seed = seed or 171198
325
- num_inference_steps = num_inference_steps or 41
326
- guidance_scale = guidance_scale or 4.0
327
-
328
-
329
-
330
- sample = {
331
- "prompt": prompt,
332
- "prompt_attention_mask": None,
333
- "negative_prompt": negative_prompt,
334
- "negative_prompt_attention_mask": None,
335
- "media_items": None,
336
- }
337
-
338
- generator = torch.Generator(device="cpu").manual_seed(seed)
339
-
340
- def gradio_progress_callback(self, step, timestep, kwargs):
341
- progress((step + 1) / num_inference_steps)
342
-
343
- try:
344
- with torch.no_grad():
345
- images = pipeline(
346
- num_inference_steps=num_inference_steps,
347
- num_images_per_prompt=1,
348
- guidance_scale=guidance_scale,
349
- generator=generator,
350
- output_type="pt",
351
- height=height,
352
- width=width,
353
- num_frames=num_frames,
354
- frame_rate=frame_rate,
355
- **sample,
356
- is_video=True,
357
- vae_per_channel_normalize=True,
358
- conditioning_method=ConditioningMethod.UNCONDITIONAL,
359
- mixed_precision=True,
360
- callback_on_step_end=gradio_progress_callback,
361
- ).images
362
- except Exception as e:
363
- raise gr.Error(
364
- f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
365
- duration=5,
366
- )
367
- finally:
368
- torch.cuda.empty_cache()
369
- gc.collect()
370
-
371
- output_path = tempfile.mktemp(suffix=".mp4")
372
- video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
373
- video_np = (video_np * 255).astype(np.uint8)
374
- height, width = video_np.shape[1:3]
375
- out = cv2.VideoWriter(
376
- output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
377
- )
378
- for frame in video_np[..., ::-1]:
379
- out.write(frame)
380
- out.release()
381
- del images
382
- del video_np
383
- torch.cuda.empty_cache()
384
- return output_path
385
-
386
- def generate_video_from_image(
387
- image_path,
388
- prompt,
389
- enhance_prompt_toggle,
390
- negative_prompt,
391
- frame_rate,
392
- seed,
393
- num_inference_steps,
394
- guidance_scale,
395
- height,
396
- width,
397
- num_frames,
398
- progress=gr.Progress(),
399
- ):
400
- # State 객체의 value 값을 κ°€μ Έμ˜΄
401
- height = height.value if isinstance(height, gr.State) else height
402
- width = width.value if isinstance(width, gr.State) else width
403
- num_frames = num_frames.value if isinstance(num_frames, gr.State) else num_frames
404
-
405
- if not image_path:
406
- raise gr.Error("μž…λ ₯ 이미지λ₯Ό μ œκ³΅ν•΄μ£Όμ„Έμš”.", duration=5)
407
-
408
- if len(prompt.strip()) < 50:
409
- raise gr.Error(
410
- "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
411
- duration=5,
412
- )
413
-
414
- # ν”„λ‘¬ν”„νŠΈ κ°œμ„ μ΄ ν™œμ„±ν™”λœ 경우
415
- if enhance_prompt_toggle:
416
- prompt = enhance_prompt(prompt, "i2v")
417
-
418
- # Translate Korean prompts to English
419
- prompt = translate_korean_prompt(prompt)
420
- negative_prompt = translate_korean_prompt(negative_prompt)
421
-
422
- # κΈ°λ³Έκ°’ μ„€μ •
423
- height = height or 320
424
- width = width or 512
425
- num_frames = num_frames or 257
426
- frame_rate = frame_rate or 25
427
- seed = seed or 171198
428
- num_inference_steps = num_inference_steps or 41
429
- guidance_scale = guidance_scale or 4.0
430
-
431
- # 이미지 λ‘œλ“œ 및 μ „μ²˜λ¦¬
432
- media_items = (
433
- load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
434
- )
435
-
436
- sample = {
437
- "prompt": prompt,
438
- "prompt_attention_mask": None,
439
- "negative_prompt": negative_prompt,
440
- "negative_prompt_attention_mask": None,
441
- "media_items": media_items,
442
- }
443
-
444
- generator = torch.Generator(device="cpu").manual_seed(seed)
445
-
446
- def gradio_progress_callback(self, step, timestep, kwargs):
447
- progress((step + 1) / num_inference_steps)
448
-
449
- try:
450
- with torch.no_grad():
451
- images = pipeline(
452
- num_inference_steps=num_inference_steps,
453
- num_images_per_prompt=1,
454
- guidance_scale=guidance_scale,
455
- generator=generator,
456
- output_type="pt",
457
- height=height,
458
- width=width,
459
- num_frames=num_frames,
460
- frame_rate=frame_rate,
461
- **sample,
462
- is_video=True,
463
- vae_per_channel_normalize=True,
464
- conditioning_method=ConditioningMethod.FIRST_FRAME,
465
- mixed_precision=True,
466
- callback_on_step_end=gradio_progress_callback,
467
- ).images
468
-
469
- output_path = tempfile.mktemp(suffix=".mp4")
470
- video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
471
- video_np = (video_np * 255).astype(np.uint8)
472
- height, width = video_np.shape[1:3]
473
- out = cv2.VideoWriter(
474
- output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (width, height)
475
- )
476
- for frame in video_np[..., ::-1]:
477
- out.write(frame)
478
- out.release()
479
-
480
- except Exception as e:
481
- raise gr.Error(
482
- f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
483
- duration=5,
484
- )
485
-
486
- finally:
487
- torch.cuda.empty_cache()
488
- gc.collect()
489
- if 'images' in locals():
490
- del images
491
- if 'video_np' in locals():
492
- del video_np
493
- if 'media_items' in locals():
494
- del media_items
495
-
496
- return output_path
497
-
498
- def create_advanced_options():
499
- with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
500
- seed = gr.Slider(
501
- label="Seed",
502
- minimum=0,
503
- maximum=1000000,
504
- step=1,
505
- value=171198
506
- )
507
- inference_steps = gr.Slider(
508
- label="4.2 Inference Steps",
509
- minimum=1,
510
- maximum=50,
511
- step=1,
512
- value=41,
513
- visible=False
514
- )
515
- guidance_scale = gr.Slider(
516
- label="4.3 Guidance Scale",
517
- minimum=1.0,
518
- maximum=5.0,
519
- step=0.1,
520
- value=4.0,
521
- visible=False
522
- )
523
- height_slider = gr.Slider(
524
- label="4.4 Height",
525
- minimum=256,
526
- maximum=1024,
527
- step=64,
528
- value=320,
529
- visible=False,
530
- )
531
- width_slider = gr.Slider(
532
- label="4.5 Width",
533
- minimum=256,
534
- maximum=1024,
535
- step=64,
536
- value=512,
537
- visible=False,
538
- )
539
- num_frames_slider = gr.Slider(
540
- label="4.5 Number of Frames",
541
- minimum=1,
542
- maximum=200,
543
- step=1,
544
- value=257,
545
- visible=False,
546
- )
547
-
548
- return [
549
- seed,
550
- inference_steps,
551
- guidance_scale,
552
- height_slider,
553
- width_slider,
554
- num_frames_slider,
555
- ]
556
-
557
- system_prompt_scenario = """당신은 μ˜μƒ μŠ€ν¬λ¦½νŠΈμ— λ§žλŠ” λ°°κ²½ μ˜μƒμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ ν”„λ‘¬ν”„νŠΈ μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
558
- 주어진 슀크립트의 λΆ„μœ„κΈ°μ™€ λ§₯락을 μ‹œκ°μ  배경으둜 ν‘œν˜„ν•˜λ˜, λ‹€μŒ 원칙을 λ°˜λ“œμ‹œ μ€€μˆ˜ν•˜μ„Έμš”:
559
-
560
- 1. οΏ½οΏ½οΏ½ν’ˆμ΄λ‚˜ μ„œλΉ„μŠ€λ₯Ό μ§μ ‘μ μœΌλ‘œ λ¬˜μ‚¬ν•˜μ§€ 말 것
561
- 2. 슀크립트의 감성과 ν†€μ•€λ§€λ„ˆλ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ μ˜μƒμ— 집쀑할 것
562
- 3. 5개 μ„Ήμ…˜μ΄ ν•˜λ‚˜μ˜ μ΄μ•ΌκΈ°μ²˜λŸΌ μžμ—°μŠ€λŸ½κ²Œ μ—°κ²°λ˜λ„λ‘ ν•  것
563
- 4. 좔상적이고 μ€μœ μ μΈ μ‹œκ° ν‘œν˜„μ„ ν™œμš©ν•  것
564
-
565
- 각 μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ μž‘μ„± κ°€μ΄λ“œ:
566
- 1. λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬
567
- 2. 문제 제기: κΈ΄μž₯κ°μ΄λ‚˜ κ°ˆλ“±μ„ μ•”μ‹œν•˜λŠ” λΆ„μœ„κΈ° μžˆλŠ” λ°°κ²½
568
- 3. ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜
569
- 4. λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½
570
- 5. κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½
571
-
572
- λͺ¨λ“  μ„Ήμ…˜μ΄ μΌκ΄€λœ μŠ€νƒ€μΌκ³Ό 톀을 μœ μ§€ν•˜λ©΄μ„œλ„ μžμ—°μŠ€λŸ½κ²Œ 이어지도둝 κ΅¬μ„±ν•˜μ„Έμš”.
573
-
574
- 각 μ„Ήμ…˜μ˜ ν”„λ‘¬ν”„νŠΈ μž‘μ„±μ‹œ λ°˜λ“œμ‹œ λ‹€μŒ ꡬ쑰에 맞게 κ°œμ„ ν•΄μ£Όμ„Έμš”:
575
- 1. μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
576
- 2. ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
577
- 3. 캐릭터/객체의 μ™Έλͺ¨λ₯Ό μƒμ„Ένžˆ λ¬˜μ‚¬
578
- 4. λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
579
- 5. 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
580
- 6. μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
581
- 7. λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함
582
- λͺ¨λ“  μ„€λͺ…은 ν•˜λ‚˜μ˜ μžμ—°μŠ€λŸ¬μš΄ λ¬Έλ‹¨μœΌλ‘œ μž‘μ„±ν•˜κ³ ,
583
- 촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
584
- 200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”.
585
-
586
- """
587
-
588
-
589
- def analyze_scenario(scenario):
590
- """μ‹œλ‚˜λ¦¬μ˜€λ₯Ό λΆ„μ„ν•˜μ—¬ 각 μ„Ήμ…˜λ³„ λ°°κ²½ μ˜μƒμš© ν”„λ‘¬ν”„νŠΈ 생성"""
591
- try:
592
- # 각 μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ 생성을 μœ„ν•œ λ©”μ‹œμ§€ ꡬ성
593
- section_prompts = []
594
-
595
- for section_num in range(1, 6):
596
- section_descriptions = {
597
- 1: "λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬",
598
- 2: "ν₯λ―Έ 유발: κΈ΄μž₯κ°μ΄λ‚˜ κ°ˆλ“±μ„ μ•”μ‹œν•˜λŠ” λΆ„μœ„κΈ° μžˆλŠ” λ°°κ²½",
599
- 3: "ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜",
600
- 4: "λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½",
601
- 5: "κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½"
602
- }
603
-
604
- messages = [
605
- {"role": "system", "content": system_prompt_scenario},
606
- {"role": "user", "content": f"""
607
- λ‹€μŒ 슀크립트의 {section_num}번째 μ„Ήμ…˜({section_descriptions[section_num]})에 λŒ€ν•œ
608
- λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•΄μ£Όμ„Έμš”.
609
-
610
- 슀크립트:
611
- {scenario}
612
-
613
- μ£Όμ˜μ‚¬ν•­:
614
- 1. ν•΄λ‹Ή μ„Ήμ…˜μ˜ νŠΉμ„±({section_descriptions[section_num]})에 λ§žλŠ” λΆ„μœ„κΈ°μ™€ 톀을 λ°˜μ˜ν•˜μ„Έμš”.
615
- 2. 직접적인 μ œν’ˆ/μ„œλΉ„μŠ€ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 감성적이고 μ€μœ μ μΈ λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•˜μ„Έμš”.
616
- 3. λ‹€μŒ ꡬ쑰λ₯Ό λ°˜λ“œμ‹œ ν¬ν•¨ν•˜μ„Έμš”:
617
- - μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
618
- - ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
619
- - λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
620
- - 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
621
- - μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
622
- - λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함"""}
623
- ]
624
-
625
- response = client.chat.completions.create(
626
- model="gpt-4-1106-preview",
627
- messages=messages,
628
- max_tokens=1000,
629
- temperature=0.7
630
- )
631
-
632
- section_prompt = response.choices[0].message.content.strip()
633
- section_prompts.append(f"{section_num}. {section_prompt}")
634
-
635
- # API μš”μ²­ 사이에 짧은 λ”œλ ˆμ΄ μΆ”κ°€
636
- time.sleep(1)
637
-
638
- return section_prompts
639
-
640
- except Exception as e:
641
- print(f"Error during scenario analysis: {e}")
642
- return ["Error occurred during analysis"] * 5
643
-
644
- def generate_section_video(prompt, preset, section_number=1, base_seed=171198, progress=gr.Progress()):
645
- """각 μ„Ήμ…˜μ˜ λΉ„λ””μ˜€ 생성"""
646
- try:
647
- if not prompt or len(prompt.strip()) < 50:
648
- raise gr.Error("ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€.")
649
-
650
- if not preset:
651
- raise gr.Error("해상도 프리셋을 μ„ νƒν•΄μ£Όμ„Έμš”.")
652
-
653
- selected = next((item for item in preset_options if item["label"] == preset), None)
654
- if not selected:
655
- raise gr.Error("μ˜¬λ°”λ₯΄μ§€ μ•Šμ€ ν”„λ¦¬μ…‹μž…λ‹ˆλ‹€.")
656
-
657
- section_seed = base_seed + section_number
658
-
659
- return generate_video_from_text(
660
- prompt=prompt,
661
- enhance_prompt_toggle=False, # μ„Ήμ…˜ μƒμ„±μ‹œλŠ” ν”„λ‘¬ν”„νŠΈ 증강 λΉ„ν™œμ„±ν™”
662
- negative_prompt="low quality, worst quality, deformed, distorted, warped",
663
- frame_rate=25,
664
- seed=section_seed,
665
- num_inference_steps=41,
666
- guidance_scale=4.0,
667
- height=selected["height"],
668
- width=selected["width"],
669
- num_frames=selected["num_frames"],
670
- progress=progress
671
- )
672
- except Exception as e:
673
- print(f"Error in section {section_number}: {e}")
674
- raise gr.Error(f"μ„Ήμ…˜ {section_number} 생성 쀑 였λ₯˜: {str(e)}")
675
- finally:
676
- torch.cuda.empty_cache()
677
- gc.collect()
678
-
679
- def generate_single_section_prompt(scenario, section_number):
680
- """κ°œλ³„ μ„Ήμ…˜μ— λŒ€ν•œ ν”„λ‘¬ν”„νŠΈ 생성"""
681
- section_descriptions = {
682
- 1: "λ°°κ²½ 및 ν•„μš”μ„±: 주제의 μ „λ°˜μ μΈ λΆ„μœ„κΈ°λ₯Ό ν‘œν˜„ν•˜λŠ” λ°°κ²½ 씬",
683
- 2: "ν₯λ―Έ 유발: ν₯λ―Έλ₯Ό μœ λ°œν•˜κ³  κΈ°λŒ€κ°μ„ μ¦ν­μ‹œν‚€λŠ” λ°°κ²½",
684
- 3: "ν•΄κ²°μ±… μ œμ‹œ: 희망적이고 밝은 ν†€μ˜ λ°°κ²½ μ „ν™˜",
685
- 4: "λ³Έλ‘ : μ•ˆμ •κ° 있고 신뒰도λ₯Ό λ†’μ΄λŠ” λ°°κ²½",
686
- 5: "κ²°λ‘ : μž„νŒ©νŠΈ μžˆλŠ” 마무리λ₯Ό μœ„ν•œ 역동적인 λ°°κ²½"
687
- }
688
-
689
- messages = [
690
- {"role": "system", "content": system_prompt_scenario},
691
- {"role": "user", "content": f"""
692
- λ‹€μŒ 슀크립트의 {section_number}번째 μ„Ήμ…˜({section_descriptions[section_number]})에 λŒ€ν•œ
693
- λ°°κ²½ μ˜μƒ ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•΄μ£Όμ„Έμš”.
694
-
695
- 슀크립트:
696
- {scenario}
697
-
698
- μ£Όμ˜μ‚¬ν•­:
699
- 1. ν•΄λ‹Ή μ„Ήμ…˜μ˜ νŠΉμ„±({section_descriptions[section_number]})에 λ§žλŠ” λΆ„μœ„κΈ°μ™€ 톀을 λ°˜μ˜ν•˜μ„Έμš”.
700
- 2. 직접적인 μ œν’ˆ/μ„œλΉ„μŠ€ λ¬˜μ‚¬λŠ” ν”Όν•˜κ³ , 감성적이고 μ€μœ μ μΈ λ°°κ²½ μ˜μƒμ— μ§‘μ€‘ν•˜μ„Έμš”.
701
- 3. λ‹€μŒ ꡬ쑰λ₯Ό λ°˜λ“œμ‹œ ν¬ν•¨ν•˜μ„Έμš”:
702
- - μ£Όμš” λ™μž‘μ„ λͺ…ν™•ν•œ ν•œ λ¬Έμž₯으둜 μ‹œμž‘
703
- - ꡬ체적인 λ™μž‘κ³Ό 제슀처λ₯Ό μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ„€λͺ…
704
- - λ°°κ²½κ³Ό ν™˜κ²½ μ„ΈλΆ€ 사항을 ꡬ체적으둜 포함
705
- - 카메라 각도와 μ›€μ§μž„μ„ λͺ…μ‹œ
706
- - μ‘°λͺ…κ³Ό 색상을 μžμ„Ένžˆ μ„€λͺ…
707
- - λ³€ν™”λ‚˜ κ°‘μž‘μŠ€λŸ¬μš΄ 사건을 μžμ—°μŠ€λŸ½κ²Œ 포함"""}
708
- ]
709
-
710
- try:
711
- response = client.chat.completions.create(
712
- model="gpt-4-1106-preview",
713
- messages=messages,
714
- max_tokens=1000, # 토큰 수 증가
715
- temperature=0.7
716
- )
717
- generated_prompt = response.choices[0].message.content.strip()
718
- return f"{section_number}. {generated_prompt}"
719
- except Exception as e:
720
- print(f"Error during prompt generation for section {section_number}: {e}")
721
- return f"Error occurred during prompt generation for section {section_number}"
722
-
723
-
724
- # λΉ„λ””μ˜€ κ²°ν•© ν•¨μˆ˜ μΆ”κ°€
725
- def combine_videos(video_paths, output_path):
726
- """μ—¬λŸ¬ λΉ„λ””μ˜€λ₯Ό ν•˜λ‚˜λ‘œ κ²°ν•©"""
727
- if not all(video_paths):
728
- raise gr.Error("λͺ¨λ“  μ„Ήμ…˜μ˜ μ˜μƒμ΄ μƒμ„±λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€.")
729
-
730
- try:
731
- # 첫 번째 λΉ„λ””μ˜€μ˜ 속성 κ°€μ Έμ˜€κΈ°
732
- cap = cv2.VideoCapture(video_paths[0])
733
- fps = int(cap.get(cv2.CAP_PROP_FPS))
734
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
735
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
736
- cap.release()
737
-
738
- # 좜λ ₯ λΉ„λ””μ˜€ μ„€μ •
739
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
740
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
741
-
742
- # 각 λΉ„λ””μ˜€ 순차적으둜 κ²°ν•©
743
- for video_path in video_paths:
744
- if video_path and os.path.exists(video_path):
745
- cap = cv2.VideoCapture(video_path)
746
- while True:
747
- ret, frame = cap.read()
748
- if not ret:
749
- break
750
- out.write(frame)
751
- cap.release()
752
-
753
- out.release()
754
- return output_path
755
- except Exception as e:
756
- raise gr.Error(f"λΉ„λ””μ˜€ κ²°ν•© 쀑 였λ₯˜ λ°œμƒ: {e}")
757
-
758
- def merge_section_videos(section1, section2, section3, section4, section5):
759
- """μ„Ήμ…˜ λΉ„λ””μ˜€λ“€μ„ ν•˜λ‚˜λ‘œ κ²°ν•©"""
760
- videos = []
761
-
762
- # 각 μ„Ήμ…˜ λΉ„λ””μ˜€ 확인 및 처리
763
- for i, video_path in enumerate([section1, section2, section3, section4, section5], 1):
764
- if video_path:
765
- if os.path.exists(video_path):
766
- try:
767
- # λΉ„λ””μ˜€ 파일 검증
768
- cap = cv2.VideoCapture(video_path)
769
- if cap.isOpened():
770
- videos.append(video_path)
771
- cap.release()
772
- else:
773
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒ 파일이 μ†μƒλ˜μ—ˆκ±°λ‚˜ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€.")
774
- except Exception as e:
775
- raise gr.Error(f"μ„Ήμ…˜ {i} μ˜μƒ 처리 쀑 였λ₯˜: {str(e)}")
776
- else:
777
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒ νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
778
- else:
779
- raise gr.Error(f"μ„Ήμ…˜ {i}의 μ˜μƒμ΄ μ—†μŠ΅λ‹ˆλ‹€.")
780
-
781
- if not videos:
782
- raise gr.Error("κ²°ν•©ν•  μ˜μƒμ΄ μ—†μŠ΅λ‹ˆλ‹€.")
783
-
784
- try:
785
- output_path = tempfile.mktemp(suffix=".mp4")
786
-
787
- # 첫 번째 λΉ„λ””μ˜€μ˜ 속성 κ°€μ Έμ˜€κΈ°
788
- cap = cv2.VideoCapture(videos[0])
789
- fps = int(cap.get(cv2.CAP_PROP_FPS))
790
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
791
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
792
- cap.release()
793
-
794
- # 좜λ ₯ λΉ„λ””μ˜€ μ„€μ •
795
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
796
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
797
-
798
- # 각 λΉ„λ””μ˜€ 순차적으둜 κ²°ν•©
799
- for video_path in videos:
800
- cap = cv2.VideoCapture(video_path)
801
- while True:
802
- ret, frame = cap.read()
803
- if not ret:
804
- break
805
- # ν”„λ ˆμž„ 크기가 λ‹€λ₯Έ 경우 λ¦¬μ‚¬μ΄μ¦ˆ
806
- if frame.shape[:2] != (height, width):
807
- frame = cv2.resize(frame, (width, height))
808
- out.write(frame)
809
- cap.release()
810
-
811
- out.release()
812
- print(f"Successfully merged {len(videos)} videos")
813
- return output_path
814
-
815
- except Exception as e:
816
- raise gr.Error(f"λΉ„λ””μ˜€ κ²°ν•© 쀑 였λ₯˜ λ°œμƒ: {e}")
817
-
818
- def generate_script(topic):
819
- """μ£Όμ œμ— λ§žλŠ” 슀크립트 생성"""
820
- if not topic:
821
- return "주제λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”."
822
-
823
- messages = [
824
- {"role": "system", "content": """당신은 μ˜μƒ 슀크립트 μž‘μ„± μ „λ¬Έκ°€μž…λ‹ˆλ‹€.
825
- 주어진 주제둜 λ‹€μŒ ꡬ쑰에 λ§žλŠ” 5개 μ„Ήμ…˜μ˜ 슀크립트λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”:
826
-
827
- 1. λ°°κ²½ 및 ν•„μš”μ„±: 주제 μ†Œκ°œμ™€ μ‹œμ²­μžμ˜ ν₯λ―Έ 유발
828
- 2. ν₯λ―Έ 유발: ꡬ체적인 λ‚΄μš© μ „κ°œμ™€ ν˜ΈκΈ°μ‹¬ 자극
829
- 3. ν•΄κ²°μ±… μ œμ‹œ: 핡심 λ‚΄μš©κ³Ό ν•΄κ²°λ°©μ•ˆ μ œμ‹œ
830
- 4. λ³Έλ‘ : μƒμ„Έν•œ μ„€λͺ…κ³Ό μž₯점 뢀각
831
- 5. κ²°λ‘ : 핡심 λ©”μ‹œμ§€ 강쑰와 행동 μœ λ„
832
-
833
- 각 μ„Ήμ…˜μ€ μžμ—°μŠ€λŸ½κ²Œ μ—°κ²°λ˜μ–΄μ•Ό ν•˜λ©°,
834
- μ „μ²΄μ μœΌλ‘œ μΌκ΄€λœ 톀과 λΆ„μœ„κΈ°λ₯Ό μœ μ§€ν•˜λ©΄μ„œλ„
835
- μ‹œμ²­μžμ˜ 관심을 λκΉŒμ§€ μœ μ§€ν•  수 μžˆλ„λ‘ μž‘μ„±ν•΄μ£Όμ„Έμš”."""},
836
- {"role": "user", "content": f"λ‹€μŒ 주제둜 μ˜μƒ 슀크립트λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”: {topic}"}
837
- ]
838
-
839
- try:
840
- response = client.chat.completions.create(
841
- model="gpt-4-1106-preview",
842
- messages=messages,
843
- max_tokens=2000,
844
- temperature=0.7
845
- )
846
- return response.choices[0].message.content.strip()
847
- except Exception as e:
848
- print(f"Error during script generation: {e}")
849
- return "슀크립트 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
850
-
851
-
852
- def cleanup():
853
- """λ©”λͺ¨λ¦¬ 정리 ν•¨μˆ˜"""
854
- torch.cuda.empty_cache()
855
- gc.collect()
856
-
857
- with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
858
- # State λ³€μˆ˜λ“€μ˜ μ΄ˆκΈ°ν™”
859
- txt2vid_current_height = gr.State(value=320)
860
- txt2vid_current_width = gr.State(value=512)
861
- txt2vid_current_num_frames = gr.State(value=257)
862
-
863
- img2vid_current_height = gr.State(value=320)
864
- img2vid_current_width = gr.State(value=512)
865
- img2vid_current_num_frames = gr.State(value=257)
866
-
867
- with gr.Tabs():
868
- # Text to Video Tab
869
- with gr.TabItem("ν…μŠ€νŠΈλ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°"):
870
- with gr.Row():
871
- with gr.Column():
872
- txt2vid_prompt = gr.Textbox(
873
- label="Step 1: ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
874
- placeholder="μƒμ„±ν•˜κ³  싢은 λΉ„λ””μ˜€λ₯Ό μ„€λͺ…ν•˜μ„Έμš” (μ΅œμ†Œ 50자)...",
875
- value="κ·€μ—¬μš΄ 고양이",
876
- lines=5,
877
- )
878
- txt2vid_enhance_toggle = Toggle(
879
- label="ν”„λ‘¬ν”„νŠΈ 증강",
880
- value=False,
881
- interactive=True,
882
- )
883
- txt2vid_negative_prompt = gr.Textbox(
884
- label="Step 2: λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
885
- placeholder="λΉ„λ””μ˜€μ—μ„œ μ›ν•˜μ§€ μ•ŠλŠ” μš”μ†Œλ₯Ό μ„€λͺ…ν•˜μ„Έμš”...",
886
- value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
887
- lines=2,
888
- visible=False
889
- )
890
- txt2vid_preset = gr.Dropdown(
891
- choices=[p["label"] for p in preset_options],
892
- value="[16:9] 512x320, 10.3초",
893
- label="Step 2: 해상도 프리셋 선택",
894
- )
895
- txt2vid_frame_rate = gr.Slider(
896
- label="Step 3: ν”„λ ˆμž„ 레이트",
897
- minimum=21,
898
- maximum=30,
899
- step=1,
900
- value=25,
901
- visible=False
902
- )
903
- txt2vid_advanced = create_advanced_options()
904
- txt2vid_generate = gr.Button(
905
- "Step 3: λΉ„λ””μ˜€ 생성",
906
- variant="primary",
907
- size="lg",
908
- )
909
- with gr.Column():
910
- txt2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
911
-
912
-
913
- # Image to Video Tab
914
- with gr.TabItem("μ΄λ―Έμ§€λ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°"):
915
- with gr.Row():
916
- with gr.Column():
917
- img2vid_image = gr.Image(
918
- type="filepath",
919
- label="Step 1: μž…λ ₯ 이미지 μ—…λ‘œλ“œ",
920
- elem_id="image_upload",
921
- )
922
- img2vid_prompt = gr.Textbox(
923
- label="Step 2: ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
924
- placeholder="이미지λ₯Ό μ–΄λ–»κ²Œ μ• λ‹ˆλ©”μ΄μ…˜ν™”ν• μ§€ μ„€λͺ…ν•˜μ„Έμš” (μ΅œμ†Œ 50자)...",
925
- value="κ·€μ—¬μš΄ 고양이",
926
- lines=5,
927
- )
928
- img2vid_enhance_toggle = Toggle(
929
- label="ν”„λ‘¬ν”„νŠΈ 증강",
930
- value=False,
931
- interactive=True,
932
- )
933
- img2vid_negative_prompt = gr.Textbox(
934
- label="Step 3: λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ μž…λ ₯",
935
- placeholder="λΉ„λ””μ˜€μ—μ„œ μ›ν•˜μ§€ μ•ŠλŠ” μš”μ†Œλ₯Ό μ„€λͺ…ν•˜μ„Έμš”...",
936
- value="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
937
- lines=2,
938
- visible=False
939
- )
940
- img2vid_preset = gr.Dropdown(
941
- choices=[p["label"] for p in preset_options],
942
- value="[16:9] 512x320, 10.3초",
943
- label="Step 3: 해상도 프리셋 선택",
944
- )
945
- img2vid_frame_rate = gr.Slider(
946
- label="Step 4: ν”„λ ˆμž„ 레이트",
947
- minimum=21,
948
- maximum=30,
949
- step=1,
950
- value=25,
951
- visible=False
952
- )
953
- img2vid_advanced = create_advanced_options()
954
- img2vid_generate = gr.Button(
955
- "Step 4: λΉ„λ””μ˜€ 생성",
956
- variant="primary",
957
- size="lg",
958
- )
959
- with gr.Column():
960
- img2vid_output = gr.Video(label="μƒμ„±λœ λΉ„λ””μ˜€")
961
-
962
-
963
- # Scenario Tab
964
- with gr.TabItem("μ‹œλ‚˜λ¦¬μ˜€λ‘œ λΉ„λ””μ˜€ λ§Œλ“€κΈ°(숏폼)"):
965
- with gr.Row():
966
- with gr.Column(scale=1):
967
- script_topic = gr.Textbox(
968
- label="슀크립트 생성",
969
- placeholder="겨울 일본 온천 여행을 주제둜 밝은 λŠλ‚ŒμœΌλ‘œ 슀크립트 μƒμ„±ν•˜λΌ",
970
- lines=2
971
- )
972
- generate_script_btn = gr.Button("슀크립트 생성", variant="primary")
973
-
974
- scenario_input = gr.Textbox(
975
- label="μ˜μƒ 슀크립트 μž…λ ₯",
976
- placeholder="전체 μ‹œλ‚˜λ¦¬μ˜€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...",
977
- lines=10
978
- )
979
- scenario_preset = gr.Dropdown(
980
- choices=[p["label"] for p in preset_options],
981
- value="[16:9] 512x320, 10.3초",
982
- label="ν™”λ©΄ 크기 선택"
983
- )
984
- analyze_btn = gr.Button("μ‹œλ‚˜λ¦¬μ˜€ 뢄석 및 ν”„λ‘¬ν”„νŠΈ 생성", variant="primary")
985
-
986
- with gr.Column(scale=2):
987
- with gr.Row():
988
- # μ„Ήμ…˜ 1
989
- with gr.Column():
990
- section1_prompt = gr.Textbox(
991
- label="1. λ°°κ²½ 및 ν•„μš”μ„±",
992
- lines=4
993
- )
994
- with gr.Row():
995
- section1_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
996
- section1_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
997
- section1_video = gr.Video(label="μ„Ήμ…˜ 1 μ˜μƒ")
998
-
999
- # μ„Ήμ…˜ 2
1000
- with gr.Column():
1001
- section2_prompt = gr.Textbox(
1002
- label="2. ν₯λ―Έ 유발",
1003
- lines=4
1004
- )
1005
- with gr.Row():
1006
- section2_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1007
- section2_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1008
- section2_video = gr.Video(label="μ„Ήμ…˜ 2 μ˜μƒ")
1009
-
1010
-
1011
-
1012
- with gr.Row():
1013
- # μ„Ήμ…˜ 3
1014
- with gr.Column():
1015
- section3_prompt = gr.Textbox(
1016
- label="3. ν•΄κ²°μ±… μ œμ‹œ",
1017
- lines=4
1018
- )
1019
- with gr.Row():
1020
- section3_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1021
- section3_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1022
- section3_video = gr.Video(label="μ„Ήμ…˜ 3 μ˜μƒ")
1023
-
1024
- # μ„Ήμ…˜ 4
1025
- with gr.Column():
1026
- section4_prompt = gr.Textbox(
1027
- label="4. λ³Έλ‘ ",
1028
- lines=4
1029
- )
1030
- with gr.Row():
1031
- section4_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1032
- section4_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1033
- section4_video = gr.Video(label="μ„Ήμ…˜ 4 μ˜μƒ")
1034
-
1035
- with gr.Row():
1036
- # μ„Ήμ…˜ 5
1037
- with gr.Column():
1038
- section5_prompt = gr.Textbox(
1039
- label="5. κ²°λ‘  및 κ°•μ‘°",
1040
- lines=4
1041
- )
1042
- with gr.Row():
1043
- section5_regenerate = gr.Button("πŸ”„ ν”„λ‘¬ν”„νŠΈ 생성")
1044
- section5_generate = gr.Button("πŸ”„ μ˜μƒ 생성")
1045
- section5_video = gr.Video(label="μ„Ήμ…˜ 5 μ˜μƒ")
1046
-
1047
- # 톡합 μ˜μƒ μ„Ήμ…˜
1048
- with gr.Row():
1049
- with gr.Column(scale=1):
1050
- merge_videos_btn = gr.Button("톡합 μ˜μƒ 생성", variant="primary", size="lg")
1051
-
1052
- with gr.Column(scale=2):
1053
- with gr.Row():
1054
- merged_video_output = gr.Video(label="톡합 μ˜μƒ")
1055
-
1056
-
1057
- # Text to Video Tab handlers
1058
- txt2vid_preset.change(
1059
- fn=preset_changed,
1060
- inputs=[txt2vid_preset],
1061
- outputs=[
1062
- txt2vid_current_height,
1063
- txt2vid_current_width,
1064
- txt2vid_current_num_frames,
1065
- txt2vid_advanced[3], # height_slider
1066
- txt2vid_advanced[4], # width_slider
1067
- txt2vid_advanced[5], # num_frames_slider
1068
- ]
1069
- )
1070
-
1071
- txt2vid_enhance_toggle.change(
1072
- fn=update_prompt_t2v,
1073
- inputs=[txt2vid_prompt, txt2vid_enhance_toggle],
1074
- outputs=txt2vid_prompt
1075
- )
1076
-
1077
- txt2vid_generate.click(
1078
- fn=generate_video_from_text,
1079
- inputs=[
1080
- txt2vid_prompt,
1081
- txt2vid_enhance_toggle,
1082
- txt2vid_negative_prompt,
1083
- txt2vid_frame_rate,
1084
- txt2vid_advanced[0], # seed
1085
- txt2vid_advanced[1], # inference_steps
1086
- txt2vid_advanced[2], # guidance_scale
1087
- txt2vid_current_height,
1088
- txt2vid_current_width,
1089
- txt2vid_current_num_frames,
1090
- ],
1091
- outputs=txt2vid_output,
1092
- )
1093
-
1094
- # Image to Video Tab handlers
1095
- img2vid_preset.change(
1096
- fn=preset_changed,
1097
- inputs=[img2vid_preset],
1098
- outputs=[
1099
- img2vid_current_height,
1100
- img2vid_current_width,
1101
- img2vid_current_num_frames,
1102
- img2vid_advanced[3], # height_slider
1103
- img2vid_advanced[4], # width_slider
1104
- img2vid_advanced[5], # num_frames_slider
1105
- ]
1106
- )
1107
-
1108
- img2vid_enhance_toggle.change(
1109
- fn=update_prompt_i2v,
1110
- inputs=[img2vid_prompt, img2vid_enhance_toggle],
1111
- outputs=img2vid_prompt
1112
- )
1113
-
1114
- img2vid_generate.click(
1115
- fn=generate_video_from_image,
1116
- inputs=[
1117
- img2vid_image,
1118
- img2vid_prompt,
1119
- img2vid_enhance_toggle,
1120
- img2vid_negative_prompt,
1121
- img2vid_frame_rate,
1122
- img2vid_advanced[0], # seed
1123
- img2vid_advanced[1], # inference_steps
1124
- img2vid_advanced[2], # guidance_scale
1125
- img2vid_current_height,
1126
- img2vid_current_width,
1127
- img2vid_current_num_frames,
1128
- ],
1129
- outputs=img2vid_output,
1130
- )
1131
-
1132
-
1133
-
1134
- # Scenario Tab handlers
1135
- generate_script_btn.click(
1136
- fn=generate_script,
1137
- inputs=[script_topic],
1138
- outputs=[scenario_input]
1139
- )
1140
-
1141
- analyze_btn.click(
1142
- fn=analyze_scenario,
1143
- inputs=[scenario_input],
1144
- outputs=[
1145
- section1_prompt, section2_prompt, section3_prompt,
1146
- section4_prompt, section5_prompt
1147
- ]
1148
- )
1149
-
1150
- # μ„Ήμ…˜λ³„ ν”„λ‘¬ν”„νŠΈ μž¬μƒμ„± ν•Έλ“€λŸ¬
1151
- section1_regenerate.click(
1152
- fn=lambda x: generate_single_section_prompt(x, 1),
1153
- inputs=[scenario_input],
1154
- outputs=section1_prompt
1155
- )
1156
-
1157
- section2_regenerate.click(
1158
- fn=lambda x: generate_single_section_prompt(x, 2),
1159
- inputs=[scenario_input],
1160
- outputs=section2_prompt
1161
- )
1162
-
1163
- section3_regenerate.click(
1164
- fn=lambda x: generate_single_section_prompt(x, 3),
1165
- inputs=[scenario_input],
1166
- outputs=section3_prompt
1167
- )
1168
-
1169
- section4_regenerate.click(
1170
- fn=lambda x: generate_single_section_prompt(x, 4),
1171
- inputs=[scenario_input],
1172
- outputs=section4_prompt
1173
- )
1174
-
1175
- section5_regenerate.click(
1176
- fn=lambda x: generate_single_section_prompt(x, 5),
1177
- inputs=[scenario_input],
1178
- outputs=section5_prompt
1179
- )
1180
-
1181
- # μ„Ήμ…˜λ³„ λΉ„λ””μ˜€ 생성 ν•Έλ“€λŸ¬
1182
- section1_generate.click(
1183
- fn=lambda p, pr: generate_section_video(p, pr, 1),
1184
- inputs=[section1_prompt, scenario_preset],
1185
- outputs=section1_video
1186
- )
1187
-
1188
- section2_generate.click(
1189
- fn=lambda p, pr: generate_section_video(p, pr, 2),
1190
- inputs=[section2_prompt, scenario_preset],
1191
- outputs=section2_video
1192
- )
1193
-
1194
- section3_generate.click(
1195
- fn=lambda p, pr: generate_section_video(p, pr, 3),
1196
- inputs=[section3_prompt, scenario_preset],
1197
- outputs=section3_video
1198
- )
1199
-
1200
- section4_generate.click(
1201
- fn=lambda p, pr: generate_section_video(p, pr, 4),
1202
- inputs=[section4_prompt, scenario_preset],
1203
- outputs=section4_video
1204
- )
1205
-
1206
- section5_generate.click(
1207
- fn=lambda p, pr: generate_section_video(p, pr, 5),
1208
- inputs=[section5_prompt, scenario_preset],
1209
- outputs=section5_video
1210
- )
1211
-
1212
- # 톡합 μ˜μƒ 생성 ν•Έλ“€λŸ¬
1213
- merge_videos_btn.click(
1214
- fn=merge_section_videos,
1215
- inputs=[
1216
- section1_video,
1217
- section2_video,
1218
- section3_video,
1219
- section4_video,
1220
- section5_video
1221
- ],
1222
- outputs=merged_video_output
1223
- )
1224
-
1225
- if __name__ == "__main__":
1226
- iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
1227
- share=True,
1228
- show_api=False
1229
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ exec(os.environ.get('APP'))