Spaces:

TIGER-Lab
/

GenAI-Arena

Running on Zero

App Files Files Community

tianleliphoebe commited on Apr 23

Commit

26dad4e

•

1 Parent(s): 3efdac8

update video generation

Browse files

Files changed (7) hide show

model/model_manager.py +6 -6
model/model_registry.py +25 -3
model/models/__init__.py +7 -2
model/models/fal_api_models.py +7 -1
model/models/videogenhub_models.py +12 -0
requirements.txt +10 -2
serve/vote_utils.py +58 -21

model/model_manager.py CHANGED Viewed

@@ -37,7 +37,7 @@ class ModelManager:
         results = []
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ig, prompt, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
@@ -47,7 +47,7 @@ class ModelManager:
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ig, prompt, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1]
@@ -63,7 +63,7 @@ class ModelManager:
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ie, textbox_source, textbox_target, textbox_instruct, source_image, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1]
@@ -77,7 +77,7 @@ class ModelManager:
         # model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ie, textbox_source, textbox_target, textbox_instruct, source_image, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
@@ -97,7 +97,7 @@ class ModelManager:
         results = []
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_video_vg, prompt, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
@@ -107,7 +107,7 @@ class ModelManager:
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_video_vg, prompt, model): model for model in model_names}
-            for future in concurrent.futures.as_completed(future_to_result):
                 result = future.result()
                 results.append(result)
         return results[0], results[1]

         results = []
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ig, prompt, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ig, prompt, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1]
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ie, textbox_source, textbox_target, textbox_instruct, source_image, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1]
         # model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_image_ie, textbox_source, textbox_target, textbox_instruct, source_image, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
         results = []
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_video_vg, prompt, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1], model_names[0], model_names[1]
         model_names = [model_A, model_B]
         with concurrent.futures.ThreadPoolExecutor() as executor:
             future_to_result = {executor.submit(self.generate_video_vg, prompt, model): model for model in model_names}
+            for future in future_to_result:
                 result = future.result()
                 results.append(result)
         return results[0], results[1]

model/model_registry.py CHANGED Viewed

@@ -166,18 +166,39 @@ register_model_info(
 )
 register_model_info(
-    ["fal_fast-animatediff/text-to-video_text2video"],
     "AnimateDiff",
     "https://fal.ai/models/fast-animatediff-t2v",
     "AnimateDiff is a text-driven models that produce diverse and personalized animated images.",
 )
 register_model_info(
-    ["fal_fast-animatediff/turbo/text-to-video_text2video"],
     "AnimateDiff Turbo",
     "https://fal.ai/models/fast-animatediff-t2v-turbo",
     "AnimateDiff Turbo is a lightning version of AnimateDiff.",
 )
 models = ['imagenhub_LCM_generation','imagenhub_SDXLTurbo_generation','imagenhub_SDXL_generation',
@@ -185,4 +206,5 @@ models = ['imagenhub_LCM_generation','imagenhub_SDXLTurbo_generation','imagenhub
           'imagenhub_StableCascade_generation','imagenhub_PlaygroundV2_generation', 'fal_Playground-v25_generation', 'fal_stable-cascade_text2image',
           'imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZero_edition', 'imagenhub_Prompt2prompt_edition',
           'imagenhub_SDEdit_edition', 'imagenhub_InstructPix2Pix_edition', 'imagenhub_MagicBrush_edition', 'imagenhub_PNP_edition'
-          "fal_fast-animatediff/turbo/text-to-video_text2video", "fal_fast-animatediff/text-to-video_text2video"]

 )
 register_model_info(
+    ["fal_AnimateDiff_text2video"],
     "AnimateDiff",
     "https://fal.ai/models/fast-animatediff-t2v",
     "AnimateDiff is a text-driven models that produce diverse and personalized animated images.",
 )
 register_model_info(
+    ["fal_AnimateDiffTurbo_text2video"],
     "AnimateDiff Turbo",
     "https://fal.ai/models/fast-animatediff-t2v-turbo",
     "AnimateDiff Turbo is a lightning version of AnimateDiff.",
 )
+register_model_info(
+    ["videogenhub_LaVie_generation"],
+    "LaVie",
+    "https://github.com/Vchitect/LaVie",
+    "LaVie is a video generation model with cascaded latent diffusion models.",
+)
+register_model_info(
+    ["videogenhub_VideoCrafter2_generation"],
+    "VideoCrafter2",
+    "https://ailab-cvc.github.io/videocrafter2/",
+    "VideoCrafter2 is a T2V model that disentangling motion from appearance.",
+)
+register_model_info(
+    ["videogenhub_ModelScope_generation"],
+    "ModelScope",
+    "https://arxiv.org/abs/2308.06571",
+    "ModelScope is a a T2V synthesis model that evolves from a T2I synthesis model.",
+)
 models = ['imagenhub_LCM_generation','imagenhub_SDXLTurbo_generation','imagenhub_SDXL_generation',
           'imagenhub_StableCascade_generation','imagenhub_PlaygroundV2_generation', 'fal_Playground-v25_generation', 'fal_stable-cascade_text2image',
           'imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZero_edition', 'imagenhub_Prompt2prompt_edition',
           'imagenhub_SDEdit_edition', 'imagenhub_InstructPix2Pix_edition', 'imagenhub_MagicBrush_edition', 'imagenhub_PNP_edition'
+          "fal_AnimateDiffTurbo_text2video", "fal_AnimateDiff_text2video",
+          "videogenhub_LaVie_generation", "videogenhub_VideoCrafter2_generation", "videogenhub_ModelScope_generation"]

model/models/__init__.py CHANGED Viewed

@@ -1,14 +1,17 @@
 from .imagenhub_models import load_imagenhub_model
 from .playground_api import load_playground_model
 from .fal_api_models import load_fal_model
 IMAGE_GENERATION_MODELS = ['imagenhub_LCM_generation','imagenhub_SDXLTurbo_generation','imagenhub_SDXL_generation', 'imagenhub_PixArtAlpha_generation',
                             'imagenhub_OpenJourney_generation','imagenhub_SDXLLightning_generation', 'imagenhub_StableCascade_generation',
                             'playground_PlayGroundV2_generation', 'playground_PlayGroundV2.5_generation']
 IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZero_edition', 'imagenhub_Prompt2prompt_edition',
                         'imagenhub_SDEdit_edition', 'imagenhub_InstructPix2Pix_edition', 'imagenhub_MagicBrush_edition', 'imagenhub_PNP_edition']
-VIDEO_GENERATION_MODELS = ['fal_fast-animatediff/text-to-video_text2video',
-                           'fal_fast-animatediff/turbo/text-to-video_text2video']
 def load_pipeline(model_name):
@@ -27,6 +30,8 @@ def load_pipeline(model_name):
         pipe = load_playground_model(model_name)
     elif model_source == "fal":
         pipe = load_fal_model(model_name, model_type)
     else:
         raise ValueError(f"Model source {model_source} not supported")
     return pipe

 from .imagenhub_models import load_imagenhub_model
 from .playground_api import load_playground_model
 from .fal_api_models import load_fal_model
+from .videogenhub_models import load_videogenhub_model
 IMAGE_GENERATION_MODELS = ['imagenhub_LCM_generation','imagenhub_SDXLTurbo_generation','imagenhub_SDXL_generation', 'imagenhub_PixArtAlpha_generation',
                             'imagenhub_OpenJourney_generation','imagenhub_SDXLLightning_generation', 'imagenhub_StableCascade_generation',
                             'playground_PlayGroundV2_generation', 'playground_PlayGroundV2.5_generation']
 IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZero_edition', 'imagenhub_Prompt2prompt_edition',
                         'imagenhub_SDEdit_edition', 'imagenhub_InstructPix2Pix_edition', 'imagenhub_MagicBrush_edition', 'imagenhub_PNP_edition']
+VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
+                           'fal_AnimateDiffTurbo_text2video',
+                           'videogenhub_LaVie_generation', 'videogenhub_VideoCrafter2_generation',
+                           'videogenhub_ModelScope_generation']
 def load_pipeline(model_name):
         pipe = load_playground_model(model_name)
     elif model_source == "fal":
         pipe = load_fal_model(model_name, model_type)
+    elif model_source == "videogenhub":
+        pipe = load_videogenhub_model(model_name)
     else:
         raise ValueError(f"Model source {model_source} not supported")
     return pipe

model/models/fal_api_models.py CHANGED Viewed

@@ -51,8 +51,14 @@ class FalModel():
             # return result
         elif self.model_type == "text2video":
             assert "prompt" in kwargs, "prompt is required for text2video model"
             handler = fal_client.submit(
-                f"fal-ai/{self.model_name}",
                 arguments={
                     "prompt": kwargs["prompt"]
                 },

             # return result
         elif self.model_type == "text2video":
             assert "prompt" in kwargs, "prompt is required for text2video model"
+            if self.model_name == 'AnimateDiff':
+                fal_model_name = 'fast-animatediff/text-to-video'
+            elif self.model_name == 'AnimateDiffTurbo':
+                fal_model_name = 'fast-animatediff/turbo/text-to-video'
+            else:
+                raise NotImplementedError(f"text2video model of {self.model_name} in fal is not implemented yet")
             handler = fal_client.submit(
+                f"fal-ai/{fal_model_name}",
                 arguments={
                     "prompt": kwargs["prompt"]
                 },

model/models/videogenhub_models.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import videogen_hub
+class VideogenHubModel():
+    def __init__(self, model_name):
+        self.model = videogen_hub.load(model_name)
+    def __call__(self, *args, **kwargs):
+        return self.model.infer_one_video(*args, **kwargs)
+def load_videogenhub_model(model_name):
+    return VideogenHubModel(model_name)

requirements.txt CHANGED Viewed

@@ -5,7 +5,7 @@ faiss-cpu
 fire
 h5py
 xformers~=0.0.20
-numpy>=1.24.0
 pandas<2.0.0
 peft
 torch
@@ -49,4 +49,12 @@ statsmodels
 plotly
 -e git+https://github.com/TIGER-AI-Lab/ImagenHub.git#egg=imagen-hub
 fal_client

 fire
 h5py
 xformers~=0.0.20
+numpy>=1.23.5
 pandas<2.0.0
 peft
 torch
 plotly
 -e git+https://github.com/TIGER-AI-Lab/ImagenHub.git#egg=imagen-hub
 fal_client
+-e git+https://github.com/TIGER-AI-Lab/VideoGenHub.git#egg=videogen-hub
+open_clip_torch
+decord
+huggingface_hub
+open-clip-torch-any-py3
+modelscope
+protobuf==3.20.*
+rotary_embedding_torch
+av

serve/vote_utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pathlib import Path
 from .utils import *
 from .log_utils import build_logger
 from .constants import IMAGE_DIR, VIDEO_DIR
 ig_logger = build_logger("gradio_web_server_image_generation", "gr_web_image_generation.log") # ig = image generation, loggers for single model direct chat
 igm_logger = build_logger("gradio_web_server_image_generation_multi", "gr_web_image_generation_multi.log") # igm = image generation multi, loggers for side-by-side and battle
@@ -105,9 +106,14 @@ def vote_last_response_vg(state, vote_type, model_selector, request: gr.Request)
     output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
-    r = requests.get(state.output)
-    with open(output_file, 'wb') as outfile:
-        outfile.write(r.content)
     save_video_file_on_log_server(output_file)
@@ -126,9 +132,14 @@ def vote_last_response_vgm(states, vote_type, model_selectors, request: gr.Reque
     for state in states:
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        r = requests.get(state.output)
-        with open(output_file, 'wb') as outfile:
-            outfile.write(r.content)
         save_video_file_on_log_server(output_file)
@@ -799,7 +810,7 @@ def generate_vg(gen_func, state, text, model_name, request: gr.Request):
     state.output = generated_video
     state.model_name = model_name
-    yield state, generated_video
     finish_tstamp = time.time()
@@ -819,10 +830,17 @@ def generate_vg(gen_func, state, text, model_name, request: gr.Request):
     output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
-    r = requests.get(state.output)
-    with open(output_file, 'wb') as outfile:
-        outfile.write(r.content)
     save_video_file_on_log_server(output_file)
 def generate_vgm(gen_func, state0, state1, text, model_name0, model_name1, request: gr.Request):
     if not text:
@@ -848,11 +866,13 @@ def generate_vgm(gen_func, state0, state1, text, model_name0, model_name1, reque
     state1.output = generated_video1
     state0.model_name = model_name0
     state1.model_name = model_name1
-    yield state0, state1, generated_video0, generated_video1
     finish_tstamp = time.time()
-    # logger.info(f"===output===: {output}")
     with open(get_conv_log_filename(), "a") as fout:
         data = {
@@ -883,10 +903,19 @@ def generate_vgm(gen_func, state0, state1, text, model_name0, model_name1, reque
     for i, state in enumerate([state0, state1]):
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        r = requests.get(state.output)
-        with open(output_file, 'wb') as outfile:
-            outfile.write(r.content)
         save_video_file_on_log_server(output_file)
 def generate_vgm_annoy(gen_func, state0, state1, text, model_name0, model_name1, request: gr.Request):
@@ -909,8 +938,8 @@ def generate_vgm_annoy(gen_func, state0, state1, text, model_name0, model_name1,
     state0.model_name = model_name0
     state1.model_name = model_name1
-    yield state0, state1, generated_video0, generated_video1, \
-        gr.Markdown(f"### Model A: {model_name0}"), gr.Markdown(f"### Model B: {model_name1}")
     finish_tstamp = time.time()
     # logger.info(f"===output===: {output}")
@@ -944,7 +973,15 @@ def generate_vgm_annoy(gen_func, state0, state1, text, model_name0, model_name1,
     for i, state in enumerate([state0, state1]):
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        r = requests.get(state.output)
-        with open(output_file, 'wb') as outfile:
-            outfile.write(r.content)
-        save_video_file_on_log_server(output_file)

 from .utils import *
 from .log_utils import build_logger
 from .constants import IMAGE_DIR, VIDEO_DIR
+import imageio
 ig_logger = build_logger("gradio_web_server_image_generation", "gr_web_image_generation.log") # ig = image generation, loggers for single model direct chat
 igm_logger = build_logger("gradio_web_server_image_generation_multi", "gr_web_image_generation_multi.log") # igm = image generation multi, loggers for side-by-side and battle
     output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    if state.model_name.startswith('fal'):
+        r = requests.get(state.output)
+        with open(output_file, 'wb') as outfile:
+            outfile.write(r.content)
+    else:
+        print("======== video shape: ========")
+        print(state.output.shape)
+        imageio.mimwrite(output_file, state.output, fps=8, quality=9)
     save_video_file_on_log_server(output_file)
     for state in states:
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
+        if state.model_name.startswith('fal'):
+            r = requests.get(state.output)
+            with open(output_file, 'wb') as outfile:
+                outfile.write(r.content)
+        else:
+            print("======== video shape: ========")
+            print(state.output.shape)
+            imageio.mimwrite(output_file, state.output, fps=8, quality=9)
         save_video_file_on_log_server(output_file)
     state.output = generated_video
     state.model_name = model_name
+    # yield state, generated_video
     finish_tstamp = time.time()
     output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    if model_name.startswith('fal'):
+        r = requests.get(state.output)
+        with open(output_file, 'wb') as outfile:
+            outfile.write(r.content)
+    else:
+        print("======== video shape: ========")
+        print(state.output.shape)
+        imageio.mimwrite(output_file, state.output, fps=8, quality=9)
     save_video_file_on_log_server(output_file)
+    yield state, output_file
 def generate_vgm(gen_func, state0, state1, text, model_name0, model_name1, request: gr.Request):
     if not text:
     state1.output = generated_video1
     state0.model_name = model_name0
     state1.model_name = model_name1
+    print("====== model name =========")
+    print(state0.model_name)
+    print(state1.model_name)
     finish_tstamp = time.time()
     with open(get_conv_log_filename(), "a") as fout:
         data = {
     for i, state in enumerate([state0, state1]):
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
+        print(state.model_name)
+        if state.model_name.startswith('fal'):
+            r = requests.get(state.output)
+            with open(output_file, 'wb') as outfile:
+                outfile.write(r.content)
+        else:
+            print("======== video shape: ========")
+            print(state.output)
+            print(state.output.shape)
+            imageio.mimwrite(output_file, state.output, fps=8, quality=9)
         save_video_file_on_log_server(output_file)
+    yield state0, state1, f'{VIDEO_DIR}/generation/{state0.conv_id}.mp4', f'{VIDEO_DIR}/generation/{state1.conv_id}.mp4'
 def generate_vgm_annoy(gen_func, state0, state1, text, model_name0, model_name1, request: gr.Request):
     state0.model_name = model_name0
     state1.model_name = model_name1
+    # yield state0, state1, generated_video0, generated_video1, \
+    #     gr.Markdown(f"### Model A: {model_name0}"), gr.Markdown(f"### Model B: {model_name1}")
     finish_tstamp = time.time()
     # logger.info(f"===output===: {output}")
     for i, state in enumerate([state0, state1]):
         output_file = f'{VIDEO_DIR}/generation/{state.conv_id}.mp4'
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
+        if state.model_name.startswith('fal'):
+            r = requests.get(state.output)
+            with open(output_file, 'wb') as outfile:
+                outfile.write(r.content)
+        else:
+            print("======== video shape: ========")
+            print(state.output.shape)
+            imageio.mimwrite(output_file, state.output, fps=8, quality=9)
+        save_video_file_on_log_server(output_file)
+    yield state0, state1, f'{VIDEO_DIR}/generation/{state0.conv_id}.mp4', f'{VIDEO_DIR}/generation/{state1.conv_id}.mp4', \
+        gr.Markdown(f"### Model A: {model_name0}"), gr.Markdown(f"### Model B: {model_name1}")