Spaces:

rynmurdock
/

generative_recsys

Sleeping

App Files Files Community

rynmurdock commited on May 19

Commit

67e8481

•

0 Parent(s):

sig

Browse files

Files changed (6) hide show

README.md +3 -0
app.py +504 -0
license +4 -0
lightning_app.py +452 -0
safety_checker_improved.py +45 -0
twitter_prompts.csv +72 -0

README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Blue Tigers
2	+
3	+ Zahir with movement.

app.py ADDED Viewed

	@@ -0,0 +1,504 @@

+# TODO save & restart from (if it exists) dataframe parquet
+import torch
+# lol
+DEVICE = 'cuda'
+STEPS = 6
+output_hidden_state = False
+device = "cuda"
+dtype = torch.float16
+import matplotlib.pyplot as plt
+import matplotlib
+from sklearn.linear_model import Ridge
+from sfast.compilers.diffusion_pipeline_compiler import (compile, compile_unet,
+                                                         CompilationConfig)
+config = CompilationConfig.Default()
+try:
+    import triton
+    config.enable_triton = True
+except ImportError:
+    print('Triton not installed, skip')
+config.enable_cuda_graph = True
+config.enable_jit = True
+config.enable_jit_freeze = True
+config.enable_cnn_optimization = True
+config.preserve_parameters = False
+config.prefer_lowp_gemm = True
+import imageio
+import gradio as gr
+import numpy as np
+from sklearn.svm import SVC
+from sklearn.inspection import permutation_importance
+from sklearn import preprocessing
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+import random
+import time
+from PIL import Image
+from safety_checker_improved import maybe_nsfw
+torch.set_grad_enabled(False)
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+prevs_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate'])
+import spaces
+prompt_list = [p for p in list(set(
+                pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
+start_time = time.time()
+####################### Setup Model
+from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, LCMScheduler, AutoencoderTiny, UNet2DConditionModel, AutoencoderKL
+from transformers import CLIPTextModel
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+from PIL import Image
+from transformers import CLIPVisionModelWithProjection
+import uuid
+import av
+def write_video(file_name, images, fps=17):
+    print('Saving')
+    container = av.open(file_name, mode="w")
+    stream = container.add_stream("h264", rate=fps)
+    # stream.options = {'preset': 'faster'}
+    stream.thread_count = 0
+    stream.width = 512
+    stream.height = 512
+    stream.pix_fmt = "yuv420p"
+    for img in images:
+        img = np.array(img)
+        img = np.round(img).astype(np.uint8)
+        frame = av.VideoFrame.from_ndarray(img, format="rgb24")
+        for packet in stream.encode(frame):
+            container.mux(packet)
+    # Flush stream
+    for packet in stream.encode():
+        container.mux(packet)
+    # Close the file
+    container.close()
+    print('Saved')
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="sdxl_models/image_encoder", torch_dtype=dtype).to(DEVICE)
+#vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
+# vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
+# vae = compile_unet(vae, config=config)
+#finetune_path = '''/home/ryn_mote/Misc/finetune-sd1.5/dreambooth-model best'''''
+#unet = UNet2DConditionModel.from_pretrained(finetune_path+'/unet/').to(dtype)
+#text_encoder = CLIPTextModel.from_pretrained(finetune_path+'/text_encoder/').to(dtype)
+unet = UNet2DConditionModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='unet').to(dtype)
+text_encoder = CLIPTextModel.from_pretrained('rynmurdock/Sea_Claws', subfolder='text_encoder').to(dtype)
+adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
+pipe = AnimateDiffPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype, unet=unet, text_encoder=text_encoder)
+pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
+pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
+pipe.set_adapters(["lcm-lora"], [.9])
+pipe.fuse_lora()
+#pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder)
+#pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+#repo = "ByteDance/AnimateDiff-Lightning"
+#ckpt = f"animatediff_lightning_4step_diffusers.safetensors"
+pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15_vit-G.bin", map_location='cpu')
+# This IP adapter improves outputs substantially.
+pipe.set_ip_adapter_scale(.8)
+pipe.unet.fuse_qkv_projections()
+#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
+#pipe = compile(pipe, config=config)
+pipe.to(device=DEVICE)
+#pipe.unet = torch.compile(pipe.unet)
+#pipe.vae = torch.compile(pipe.vae)
+im_embs = torch.zeros(1, 1, 1, 1280, device=DEVICE, dtype=dtype)
+output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
+leave_im_emb, _ = pipe.encode_image(
+                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
+)
+assert len(output.frames[0]) == 16
+leave_im_emb.detach().to('cpu')
+@spaces.GPU()
+def generate(in_im_embs):
+    in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
+    #im_embs = torch.cat((torch.zeros(1, 1280, device=DEVICE, dtype=dtype), in_im_embs), 0)
+    output = pipe(prompt='a scene', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
+    im_emb, _ = pipe.encode_image(
+                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
+            )
+    im_emb = im_emb.detach().to('cpu')
+    nsfw = maybe_nsfw(output.frames[0][len(output.frames[0])//2])
+    name = str(uuid.uuid4()).replace("-", "")
+    path = f"/tmp/{name}.mp4"
+    if nsfw:
+        gr.Warning("NSFW content detected.")
+        # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
+        return None, im_emb
+    output.frames[0] = output.frames[0] + list(reversed(output.frames[0]))
+    write_video(path, output.frames[0])
+    return path, im_emb
+#######################
+# TODO add to state instead of shared across all
+glob_idx = 0
+# TODO
+# We can keep a df of media paths, embeddings, and user ratings.
+#   We can drop by lowest user ratings to keep enough RAM available when we get too many rows.
+#   We can continuously update by who is most recently active in the background & server as we go, plucking using "has been seen" and similarity
+#   to user embeds
+def get_user_emb(embs, ys):
+    # handle case where every instance of calibration videos is 'Neither' or 'Like' or 'Dislike'
+    if len(list(set(ys))) <= 1:
+        embs.append(.01*torch.randn(1280))
+        embs.append(.01*torch.randn(1280))
+        ys.append(0)
+        ys.append(1)
+        print('Fixing only one feedback class available.\n')
+    indices = list(range(len(embs)))
+    # sample only as many negatives as there are positives
+    pos_indices = [i for i in indices if ys[i] == 1]
+    neg_indices = [i for i in indices if ys[i] == 0]
+    #lower = min(len(pos_indices), len(neg_indices))
+    #neg_indices = random.sample(neg_indices, lower)
+    #pos_indices = random.sample(pos_indices, lower)
+    print(len(neg_indices), len(pos_indices))
+    # we may have just encountered a rare multi-threading diffusers issue (https://github.com/huggingface/diffusers/issues/5749);
+    # this ends up adding a rating but losing an embedding, it seems.
+    # let's take off a rating if so to continue without indexing errors.
+    if len(ys) > len(embs):
+        print('ys are longer than embs; popping latest rating')
+        ys.pop(-1)
+    feature_embs = np.array(torch.stack([embs[i].squeeze().to('cpu') for i in indices] + [leave_im_emb.to('cpu').squeeze()]).to('cpu'))
+    #scaler = preprocessing.StandardScaler().fit(feature_embs)
+    #feature_embs = scaler.transform(feature_embs)
+    chosen_y = np.array([ys[i] for i in indices] + [0])
+    print('Gathering coefficients')
+    #lin_class = Ridge(fit_intercept=False).fit(feature_embs, chosen_y)
+    lin_class = SVC(max_iter=50000, kernel='linear', C=.1, class_weight='balanced').fit(feature_embs, chosen_y)
+    coef_ = torch.tensor(lin_class.coef_, dtype=torch.double).detach().to('cpu')
+    coef_ = coef_ / coef_.abs().max() * 3
+    print('Gathered')
+    w = 1# if len(embs) % 2 == 0 else 0
+    im_emb = w * coef_.to(dtype=dtype)
+    return im_emb
+def pluck_img(user_id, user_emb):
+    print(user_id, 'user_id')
+    not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+    rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
+    while len(not_rated_rows) == 0:
+        not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+        time.sleep(.01)
+    # TODO optimize this lol
+    best_sim = -100000
+    for i in not_rated_rows.iterrows():
+        # TODO sloppy .to but it is 3am.
+        sim = torch.cosine_similarity(i[1]['embeddings'].detach().to('cpu'), user_emb.detach().to('cpu'))
+        if sim > best_sim:
+            best_sim = sim
+            best_row = i[1]
+    img = best_row['paths']
+    return img
+def background_next_image():
+    global prevs_df
+    # only let it get N (maybe 3) ahead of the user
+    not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
+    rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
+    while len(not_rated_rows) > 8 or len(rated_rows) < 4:
+        not_rated_rows = prevs_df[[i[1]['user:rating'] == {' ': ' '} for i in prevs_df.iterrows()]]
+        rated_rows = prevs_df[[i[1]['user:rating'] != {' ': ' '} for i in prevs_df.iterrows()]]
+        time.sleep(.01)
+    print(rated_rows['latest_user_to_rate'])
+    latest_user_id = rated_rows.iloc[-1]['latest_user_to_rate']
+    rated_rows = prevs_df[[i[1]['user:rating'].get(latest_user_id, None) is not None for i in prevs_df.iterrows()]]
+    print(latest_user_id)
+    embs, ys = pluck_embs_ys(latest_user_id)
+    user_emb = get_user_emb(embs, ys)
+    img, embs = generate(user_emb)
+    tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating', 'latest_user_to_rate'])
+    tmp_df['paths'] = [img]
+    tmp_df['embeddings'] = [embs]
+    tmp_df['user:rating'] = [{' ': ' '}]
+    prevs_df = pd.concat((prevs_df, tmp_df))
+    # we can free up storage by deleting the image
+    if len(prevs_df) > 50:
+        oldest_path = prevs_df.iloc[0]['paths']
+        if os.path.isfile(oldest_path):
+            os.remove(oldest_path)
+        else:
+            # If it fails, inform the user.
+            print("Error: %s file not found" % oldest_path)
+        # only keep 50 images & embeddings & ips, then remove oldest
+        prevs_df = prevs_df.iloc[1:]
+def pluck_embs_ys(user_id):
+    rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
+    not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+    while len(not_rated_rows) == 0:
+        not_rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) == None for i in prevs_df.iterrows()]]
+        rated_rows = prevs_df[[i[1]['user:rating'].get(user_id, None) != None for i in prevs_df.iterrows()]]
+        time.sleep(.01)
+    embs = rated_rows['embeddings'].to_list()
+    ys = [i[user_id] for i in rated_rows['user:rating'].to_list()]
+    print('embs', 'ys', embs, ys)
+    return embs, ys
+def next_image(calibrate_prompts, user_id):
+    global glob_idx
+    glob_idx = glob_idx + 1
+    with torch.no_grad():
+        if len(calibrate_prompts) > 0:
+            print('######### Calibrating with sample media #########')
+            cal_video = calibrate_prompts.pop(0)
+            image = prevs_df[prevs_df['paths'] == cal_video]['paths'].to_list()[0]
+            return image, calibrate_prompts
+        else:
+            print('######### Roaming #########')
+            embs, ys = pluck_embs_ys(user_id)
+            user_emb = get_user_emb(embs, ys)
+            image = pluck_img(user_id, user_emb)
+            return image, calibrate_prompts
+def start(_, calibrate_prompts, user_id, request: gr.Request):
+    image, calibrate_prompts = next_image(calibrate_prompts, user_id)
+    return [
+            gr.Button(value='Like (L)', interactive=True),
+            gr.Button(value='Neither (Space)', interactive=True),
+            gr.Button(value='Dislike (A)', interactive=True),
+            gr.Button(value='Start', interactive=False),
+            image,
+            calibrate_prompts
+            ]
+def choose(img, choice, calibrate_prompts, user_id, request: gr.Request):
+    global prevs_df
+    if choice == 'Like (L)':
+        choice = 1
+    elif choice == 'Neither (Space)':
+        img, calibrate_prompts = next_image(calibrate_prompts, user_id)
+        return img, calibrate_prompts
+    else:
+        choice = 0
+    # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
+    # TODO skip allowing rating & just continue
+    if img == None:
+        print('NSFW -- choice is disliked')
+        choice = 0
+    # TODO clean up
+    old_d = prevs_df.loc[[p.split('/')[-1] in img for p in prevs_df['paths'].to_list()], 'user:rating'][0]
+    old_d[user_id] = choice
+    prevs_df.loc[[p.split('/')[-1] in img for p in prevs_df['paths'].to_list()], 'user:rating'][0] = old_d
+    prevs_df.loc[[p.split('/')[-1] in img for p in prevs_df['paths'].to_list()], 'latest_user_to_rate'] = [user_id]
+    print('full_df, prevs_df', prevs_df, prevs_df['latest_user_to_rate'])
+    img, calibrate_prompts = next_image(calibrate_prompts, user_id)
+    return img, calibrate_prompts
+css = '''.gradio-container{max-width: 700px !important}
+#description{text-align: center}
+#description h1, #description h3{display: block}
+#description p{margin-top: 0}
+.fade-in-out {animation: fadeInOut 3s forwards}
+@keyframes fadeInOut {
+    0% {
+      background: var(--bg-color);
+    }
+    100% {
+      background: var(--button-secondary-background-fill);
+    }
+}
+'''
+js_head = '''
+<script>
+document.addEventListener('keydown', function(event) {
+    if (event.key === 'a' || event.key === 'A') {
+        // Trigger click on 'dislike' if 'A' is pressed
+        document.getElementById('dislike').click();
+    } else if (event.key === ' ' || event.keyCode === 32) {
+        // Trigger click on 'neither' if Spacebar is pressed
+        document.getElementById('neither').click();
+    } else if (event.key === 'l' || event.key === 'L') {
+        // Trigger click on 'like' if 'L' is pressed
+        document.getElementById('like').click();
+    }
+});
+function fadeInOut(button, color) {
+  button.style.setProperty('--bg-color', color);
+  button.classList.remove('fade-in-out');
+  void button.offsetWidth; // This line forces a repaint by accessing a DOM property
+  button.classList.add('fade-in-out');
+  button.addEventListener('animationend', () => {
+    button.classList.remove('fade-in-out'); // Reset the animation state
+  }, {once: true});
+}
+document.body.addEventListener('click', function(event) {
+    const target = event.target;
+    if (target.id === 'dislike') {
+      fadeInOut(target, '#ff1717');
+    } else if (target.id === 'like') {
+      fadeInOut(target, '#006500');
+    } else if (target.id === 'neither') {
+      fadeInOut(target, '#cccccc');
+    }
+});
+</script>
+'''
+with gr.Blocks(css=css, head=js_head) as demo:
+    gr.Markdown('''# Blue Tigers
+### Generative Recommenders for Exporation of Video
+Explore the latent space without text prompts based on your preferences. Learn more on [the write-up](https://rynmurdock.github.io/posts/2024/3/generative_recomenders/).
+    ''', elem_id="description")
+    user_id = gr.State(int(torch.randint(2**6, (1,))[0]))
+    calibrate_prompts = gr.State([
+    './first.mp4',
+    './second.mp4',
+    './third.mp4',
+    './fourth.mp4',
+    './fifth.mp4',
+    './sixth.mp4',
+    './seventh.mp4',
+    ])
+    def l():
+        return None
+    with gr.Row(elem_id='output-image'):
+        img = gr.Video(
+        label='Lightning',
+        autoplay=True,
+        interactive=False,
+        height=512,
+        width=512,
+        include_audio=False,
+        elem_id="video_output"
+       )
+        img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
+    with gr.Row(equal_height=True):
+        b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
+        b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither")
+        b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
+        b1.click(
+        choose,
+        [img, b1, calibrate_prompts, user_id],
+        [img, calibrate_prompts],
+        )
+        b2.click(
+        choose,
+        [img, b2, calibrate_prompts, user_id],
+        [img, calibrate_prompts],
+        )
+        b3.click(
+        choose,
+        [img, b3, calibrate_prompts, user_id],
+        [img, calibrate_prompts],
+        )
+    with gr.Row():
+        b4 = gr.Button(value='Start')
+        b4.click(start,
+                 [b4, calibrate_prompts, user_id],
+                 [b1, b2, b3, b4, img, calibrate_prompts]
+                 )
+    with gr.Row():
+        html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several videos and then roam. </ div><br><br><br>
+<div style='text-align:center; font-size:14px'>Note that while the AnimateLCM model with NSFW filtering is unlikely to produce NSFW images, this may still occur, and users should avoid NSFW content when rating.
+</ div>
+<br><br>
+<div style='text-align:center; font-size:14px'>Thanks to @multimodalart for their contributions to the demo, esp. the interface and @maxbittker for feedback.
+</ div>''')
+scheduler = BackgroundScheduler()
+scheduler.add_job(func=background_next_image, trigger="interval", seconds=1)
+scheduler.start()
+# prep our calibration prompts
+for im in [
+    './first.mp4',
+    './second.mp4',
+    './third.mp4',
+    './fourth.mp4',
+    './fifth.mp4',
+    './sixth.mp4',
+    './seventh.mp4',
+    ]:
+    tmp_df = pd.DataFrame(columns=['paths', 'embeddings', 'ips', 'user:rating'])
+    tmp_df['paths'] = [im]
+    image = list(imageio.imiter(im))
+    image = image[len(image)//2]
+    im_emb, _ = pipe.encode_image(
+                image, DEVICE, 1, output_hidden_state
+            )
+    tmp_df['embeddings'] = [im_emb.detach().to('cpu')]
+    tmp_df['user:rating'] = [{' ': ' '}]
+    prevs_df = pd.concat((prevs_df, tmp_df))
+demo.launch(share=True)

license ADDED Viewed

	@@ -0,0 +1,4 @@

+You may use this as you please iff you:
+do not hold the authors liable for any issues you may encounter;
+provide attribution by prominently linking to https://rynmurdock.github.io/posts/2024/3/generative_recomenders/ if you redistribute this code or use it within a product;
+include the word "Tiger" within the name any products downstream of this.

lightning_app.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import torch
+# lol
+sidel = 512
+DEVICE = 'cuda'
+STEPS = 4
+output_hidden_state = False
+device = "cuda"
+dtype = torch.float16
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('TkAgg')
+from sklearn.linear_model import LinearRegression
+from sfast.compilers.diffusion_pipeline_compiler import (compile, compile_unet,
+                                                         CompilationConfig)
+config = CompilationConfig.Default()
+try:
+    import triton
+    config.enable_triton = True
+except ImportError:
+    print('Triton not installed, skip')
+config.enable_cuda_graph = True
+config.enable_jit = True
+config.enable_jit_freeze = True
+config.enable_cnn_optimization = True
+config.preserve_parameters = False
+config.prefer_lowp_gemm = True
+import imageio
+import gradio as gr
+import numpy as np
+from sklearn.svm import SVC
+from sklearn.inspection import permutation_importance
+from sklearn import preprocessing
+import pandas as pd
+import random
+import time
+from PIL import Image
+from safety_checker_improved import maybe_nsfw
+torch.set_grad_enabled(False)
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+# TODO put back?
+# import spaces
+prompt_list = [p for p in list(set(
+                pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
+start_time = time.time()
+####################### Setup Model
+from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler, LCMScheduler, ConsistencyDecoderVAE, AutoencoderTiny
+from hyper_tile import split_attention, flush
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+from PIL import Image
+from transformers import CLIPVisionModelWithProjection
+import uuid
+import av
+def write_video(file_name, images, fps=10):
+    print('Saving')
+    container = av.open(file_name, mode="w")
+    stream = container.add_stream("h264", rate=fps)
+    stream.width = sidel
+    stream.height = sidel
+    stream.pix_fmt = "yuv420p"
+    for img in images:
+        img = np.array(img)
+        img = np.round(img).astype(np.uint8)
+        frame = av.VideoFrame.from_ndarray(img, format="rgb24")
+        for packet in stream.encode(frame):
+            container.mux(packet)
+    # Flush stream
+    for packet in stream.encode():
+        container.mux(packet)
+    # Close the file
+    container.close()
+    print('Saved')
+bases = {
+    #"basem": "emilianJR/epiCRealism"
+    #SG161222/Realistic_Vision_V6.0_B1_noVAE
+    #runwayml/stable-diffusion-v1-5
+    #frankjoshua/realisticVisionV51_v51VAE
+    #Lykon/dreamshaper-7
+}
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=dtype).to(DEVICE)
+vae = AutoencoderTiny.from_pretrained("madebyollin/taesd", torch_dtype=dtype)
+# vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=dtype)
+# vae = compile_unet(vae, config=config)
+#adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
+#pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, image_encoder=image_encoder, torch_dtype=dtype)
+#pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
+#pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",)
+#pipe.set_adapters(["lcm-lora"], [1])
+#pipe.fuse_lora()
+pipe = AnimateDiffPipeline.from_pretrained('emilianJR/epiCRealism', torch_dtype=dtype, image_encoder=image_encoder, vae=vae)
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+repo = "ByteDance/AnimateDiff-Lightning"
+ckpt = f"animatediff_lightning_4step_diffusers.safetensors"
+pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device='cpu'), strict=False)
+pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin", map_location='cpu')
+pipe.set_ip_adapter_scale(.8)
+# pipe.unet.fuse_qkv_projections()
+#pipe.enable_free_init(method="gaussian", use_fast_sampling=True)
+pipe = compile(pipe, config=config)
+pipe.to(device=DEVICE)
+# THIS WOULD NEED PATCHING TODO
+with split_attention(pipe.vae, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
+    # ! Change the tile_size and disable to see their effects
+    with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
+        im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
+        output = pipe(prompt='a person', guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[im_embs], num_inference_steps=STEPS)
+        leave_im_emb, _ = pipe.encode_image(
+                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
+    )
+assert len(output.frames[0]) == 16
+leave_im_emb.to('cpu')
+# TODO put back
+# @spaces.GPU()
+def generate(prompt, in_im_embs=None, base='basem'):
+    if in_im_embs == None:
+        in_im_embs = torch.zeros(1, 1, 1, 1024, device=DEVICE, dtype=dtype)
+        #in_im_embs = in_im_embs / torch.norm(in_im_embs)
+    else:
+        in_im_embs = in_im_embs.to('cuda').unsqueeze(0).unsqueeze(0)
+        #im_embs = torch.cat((torch.zeros(1, 1024, device=DEVICE, dtype=dtype), in_im_embs), 0)
+    with split_attention(pipe.unet, tile_size=128, swap_size=2, disable=False, aspect_ratio=1):
+        # ! Change the tile_size and disable to see their effects
+        with split_attention(pipe.vae, tile_size=128, disable=False, aspect_ratio=1):
+            output = pipe(prompt=prompt, guidance_scale=0, added_cond_kwargs={}, ip_adapter_image_embeds=[in_im_embs], num_inference_steps=STEPS)
+    im_emb, _ = pipe.encode_image(
+                output.frames[0][len(output.frames[0])//2], DEVICE, 1, output_hidden_state
+            )
+    nsfw = maybe_nsfw(output.frames[0][len(output.frames[0])//2])
+    name = str(uuid.uuid4()).replace("-", "")
+    path = f"/tmp/{name}.mp4"
+    if nsfw:
+        gr.Warning("NSFW content detected.")
+        # TODO could return an automatic dislike of auto dislike on the backend for neither as well; just would need refactoring.
+        return None, im_emb
+    plt.close('all')
+    plt.hist(np.array(im_emb.to('cpu')).flatten(), bins=5)
+    plt.savefig('real_im_emb_plot.jpg')
+    write_video(path, output.frames[0])
+    return path, im_emb.to('cpu')
+#######################
+# TODO add to state instead of shared across all
+glob_idx = 0
+def next_image(embs, ys, calibrate_prompts):
+    global glob_idx
+    glob_idx = glob_idx + 1
+    with torch.no_grad():
+        if len(calibrate_prompts) > 0:
+            print('######### Calibrating with sample prompts #########')
+            prompt = calibrate_prompts.pop(0)
+            print(prompt)
+            image, img_embs = generate(prompt)
+            embs += img_embs
+            print(len(embs))
+            return image, embs, ys, calibrate_prompts
+        else:
+            print('######### Roaming #########')
+            # sample a .8 of rated embeddings for some stochasticity, or at least two embeddings.
+            # could take a sample < len(embs)
+            #n_to_choose = max(int((len(embs))), 2)
+            #indices = random.sample(range(len(embs)), n_to_choose)
+            # sample only as many negatives as there are positives
+            #pos_indices = [i for i in indices if ys[i] == 1]
+            #neg_indices = [i for i in indices if ys[i] == 0]
+            #lower = min(len(pos_indices), len(neg_indices))
+            #neg_indices = random.sample(neg_indices, lower)
+            #pos_indices = random.sample(pos_indices, lower)
+            #indices = neg_indices + pos_indices
+            pos_indices = [i for i in range(len(embs)) if ys[i] == 1]
+            neg_indices = [i for i in range(len(embs)) if ys[i] == 0]
+            # the embs & ys stay tied by index but we shuffle to drop randomly
+            random.shuffle(pos_indices)
+            random.shuffle(neg_indices)
+            #if len(pos_indices) - len(neg_indices) > 48 and len(pos_indices) > 80:
+            #    pos_indices = pos_indices[32:]
+            if len(neg_indices) - len(pos_indices) > 48/16 and len(pos_indices) > 120/16:
+                pos_indices = pos_indices[1:]
+            if len(neg_indices) - len(pos_indices) > 48/16 and len(neg_indices) > 200/16:
+                neg_indices = neg_indices[2:]
+            print(len(pos_indices), len(neg_indices))
+            indices = pos_indices + neg_indices
+            embs = [embs[i] for i in indices]
+            ys = [ys[i] for i in indices]
+            indices = list(range(len(embs)))
+            # handle case where every instance of calibration prompts is 'Neither' or 'Like' or 'Dislike'
+            if len(list(set(ys))) <= 1:
+                embs.append(.01*torch.randn(1024))
+                embs.append(.01*torch.randn(1024))
+                ys.append(0)
+                ys.append(1)
+            # also add the latest 0 and the latest 1
+            has_0 = False
+            has_1 = False
+            for i in reversed(range(len(ys))):
+                if ys[i] == 0 and has_0 == False:
+                    indices.append(i)
+                    has_0 = True
+                elif ys[i] == 1 and has_1 == False:
+                    indices.append(i)
+                    has_1 = True
+                if has_0 and has_1:
+                    break
+            # we may have just encountered a rare multi-threading diffusers issue (https://github.com/huggingface/diffusers/issues/5749);
+            # this ends up adding a rating but losing an embedding, it seems.
+            # let's take off a rating if so to continue without indexing errors.
+            if len(ys) > len(embs):
+                print('ys are longer than embs; popping latest rating')
+                ys.pop(-1)
+            feature_embs = np.array(torch.stack([embs[i].to('cpu') for i in indices] + [leave_im_emb[0].to('cpu')]).to('cpu'))
+            scaler = preprocessing.StandardScaler().fit(feature_embs)
+            feature_embs = scaler.transform(feature_embs)
+            chosen_y = np.array([ys[i] for i in indices] + [0])
+            print('Gathering coefficients')
+            #lin_class = LinearRegression(fit_intercept=False).fit(feature_embs, chosen_y)
+            lin_class = SVC(max_iter=50000, kernel='linear', class_weight='balanced', C=1).fit(feature_embs, chosen_y)
+            coef_ = torch.tensor(lin_class.coef_, dtype=torch.double)
+            coef_ = coef_ / coef_.abs().max() * 3
+            print(coef_.shape, 'COEF')
+            plt.close('all')
+            plt.hist(np.array(coef_).flatten(), bins=5)
+            plt.savefig('plot.jpg')
+            print(coef_)
+            print('Gathered')
+            rng_prompt = random.choice(prompt_list)
+            w = 1# if len(embs) % 2 == 0 else 0
+            im_emb = w * coef_.to(dtype=dtype)
+            prompt= 'the scene' if glob_idx % 2 == 0 else rng_prompt
+            print(prompt)
+            image, im_emb = generate(prompt, im_emb)
+            embs += im_emb
+            if len(embs) > 700/16:
+                embs = embs[1:]
+                ys = ys[1:]
+            return image, embs, ys, calibrate_prompts
+def start(_, embs, ys, calibrate_prompts):
+    image, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
+    return [
+            gr.Button(value='Like (L)', interactive=True),
+            gr.Button(value='Neither (Space)', interactive=True),
+            gr.Button(value='Dislike (A)', interactive=True),
+            gr.Button(value='Start', interactive=False),
+            image,
+            embs,
+            ys,
+            calibrate_prompts
+            ]
+def choose(img, choice, embs, ys, calibrate_prompts):
+    if choice == 'Like (L)':
+        choice = 1
+    elif choice == 'Neither (Space)':
+        embs = embs[:-1]
+        img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
+        return img, embs, ys, calibrate_prompts
+    else:
+        choice = 0
+    # if we detected NSFW, leave that area of latent space regardless of how they rated chosen.
+    # TODO skip allowing rating
+    if img == None:
+        print('NSFW -- choice is disliked')
+        choice = 0
+    ys += [choice]*1
+    img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
+    return img, embs, ys, calibrate_prompts
+css = '''.gradio-container{max-width: 700px !important}
+#description{text-align: center}
+#description h1, #description h3{display: block}
+#description p{margin-top: 0}
+.fade-in-out {animation: fadeInOut 3s forwards}
+@keyframes fadeInOut {
+    0% {
+      background: var(--bg-color);
+    }
+    100% {
+      background: var(--button-secondary-background-fill);
+    }
+}
+'''
+js_head = '''
+<script>
+document.addEventListener('keydown', function(event) {
+    if (event.key === 'a' || event.key === 'A') {
+        // Trigger click on 'dislike' if 'A' is pressed
+        document.getElementById('dislike').click();
+    } else if (event.key === ' ' || event.keyCode === 32) {
+        // Trigger click on 'neither' if Spacebar is pressed
+        document.getElementById('neither').click();
+    } else if (event.key === 'l' || event.key === 'L') {
+        // Trigger click on 'like' if 'L' is pressed
+        document.getElementById('like').click();
+    }
+});
+function fadeInOut(button, color) {
+  button.style.setProperty('--bg-color', color);
+  button.classList.remove('fade-in-out');
+  void button.offsetWidth; // This line forces a repaint by accessing a DOM property
+  button.classList.add('fade-in-out');
+  button.addEventListener('animationend', () => {
+    button.classList.remove('fade-in-out'); // Reset the animation state
+  }, {once: true});
+}
+document.body.addEventListener('click', function(event) {
+    const target = event.target;
+    if (target.id === 'dislike') {
+      fadeInOut(target, '#ff1717');
+    } else if (target.id === 'like') {
+      fadeInOut(target, '#006500');
+    } else if (target.id === 'neither') {
+      fadeInOut(target, '#cccccc');
+    }
+});
+</script>
+'''
+with gr.Blocks(css=css, head=js_head) as demo:
+    gr.Markdown('''### Blue Tigers: Generative Recommenders for Exporation of Video
+    Explore the latent space without text prompts based on your preferences. Learn more on [the write-up](https://rynmurdock.github.io/posts/2024/3/generative_recomenders/).
+    ''', elem_id="description")
+    embs = gr.State([])
+    ys = gr.State([])
+    calibrate_prompts = gr.State([
+    'the moon is melting into my glass of tea',
+    'a sea slug -- pair of claws scuttling -- jelly fish glowing',
+    'an adorable creature. It may be a goblin or a pig or a slug.',
+    'an animation about a gorgeous nebula',
+    'an octopus writhes',
+    ])
+    def l():
+        return None
+    with gr.Row(elem_id='output-image'):
+        img = gr.Video(
+        label='Lightning',
+        autoplay=True,
+        interactive=False,
+        height=sidel,
+        width=sidel,
+        include_audio=False,
+        elem_id="video_output"
+       )
+        img.play(l, js='''document.querySelector('[data-testid="Lightning-player"]').loop = true''')
+    with gr.Row(equal_height=True):
+        b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
+        b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither")
+        b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
+        b1.click(
+        choose,
+        [img, b1, embs, ys, calibrate_prompts],
+        [img, embs, ys, calibrate_prompts]
+        )
+        b2.click(
+        choose,
+        [img, b2, embs, ys, calibrate_prompts],
+        [img, embs, ys, calibrate_prompts]
+        )
+        b3.click(
+        choose,
+        [img, b3, embs, ys, calibrate_prompts],
+        [img, embs, ys, calibrate_prompts]
+        )
+    with gr.Row():
+        b4 = gr.Button(value='Start')
+        b4.click(start,
+                 [b4, embs, ys, calibrate_prompts],
+                 [b1, b2, b3, b4, img, embs, ys, calibrate_prompts])
+    with gr.Row():
+        html = gr.HTML('''<div style='text-align:center; font-size:20px'>You will calibrate for several prompts and then roam. </ div><br><br><br>
+<div style='text-align:center; font-size:14px'>Note that while the AnimateDiff-Lightning model with NSFW filtering is unlikely to produce NSFW images, this may still occur, and users should avoid NSFW content when rating.
+</ div>
+<br><br>
+<div style='text-align:center; font-size:14px'>Thanks to @multimodalart for their contributions to the demo, esp. the interface and @maxbittker for feedback.
+</ div>''')
+demo.launch(share=True)

safety_checker_improved.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# TODO required tensorflow==2.14 for me
+# weights from https://github.com/LAION-AI/safety-pipeline/tree/main
+from PIL import Image
+import tensorflow_hub as hub
+import tensorflow
+import numpy as np
+import sys
+sys.path.append('/home/ryn_mote/Misc/generative_recommender/gradio_video/automl/efficientnetv2/')
+import tensorflow as tf
+from tensorflow.keras import mixed_precision
+physical_devices = tf.config.list_physical_devices('GPU')
+tf.config.experimental.set_memory_growth(
+    physical_devices[0], True
+)
+model = tf.keras.models.load_model('nsfweffnetv2-b02-3epochs.h5',custom_objects={"KerasLayer":hub.KerasLayer})
+# "The image classifier had been trained on 682550 images from the 5 classes "Drawing" (39026), "Hentai" (28134), "Neutral" (369507), "Porn" (207969) & "Sexy" (37914).
+# ... we created a manually inspected test set that consists of 4900 samples, that contains images & their captions."
+# Run prediction
+def maybe_nsfw(pil_image):
+    # Run prediction
+    imm = tensorflow.image.resize(np.array(pil_image)[:, :, :3], (260, 260))
+    imm = (imm / 255)
+    pred = model(tensorflow.expand_dims(imm, 0)).numpy()
+    probs = tensorflow.math.softmax(pred[0]).numpy()
+    print(probs)
+    if all([i < .3 for i in probs[[1, 3, 4]]]):
+        return False
+    return True
+# pre-initializing prediction
+maybe_nsfw(Image. new("RGB", (260, 260), 255))
+model.load_weights('nsfweffnetv2-b02-3epochs.h5', by_name=True, )

twitter_prompts.csv ADDED Viewed

	@@ -0,0 +1,72 @@

+,0
+0,a sunset
+1,a still life in blue
+2,last day on earth
+3,the conch shell
+4,the winds of change
+5,a surrealist eye
+6,a surrealist polaroid photo of an apple
+7,metaphysics
+8,the sun is setting into my glass of tea
+9,the moon at 3am
+10,a memento mori
+11,quaking aspen tree
+12,violets and daffodils
+13,espresso
+14,sisyphus
+15,high windows of stained glass
+16,a green dog
+17,an adorable companion; it is a pig
+18,bird of paradise
+19,a complex intricate machine
+20,a white clock
+21,a film featuring the landscape Salt Lake City Utah
+22,a creature
+23,a house set aflame.
+24,a gorgeous landscape by Cy Twombly
+25,smoke rises from the caterpillar's hookah
+26,corvid in red
+27,Monet's pond
+28,Genesis
+29,Death is a black camel that kneels down so we can ride
+30,a cherry tree made of fractals
+29,the end of the sidewalk
+30,a polaroid photo of a bustling city of lights and sky scrapers
+31,The Fig Tree metaphor
+32,God killed Van Gogh.
+33,a cosmic entity alien with four eyes.
+34,a horse with 128 eyes.
+35,a being with an infinite set of eyes (it is omniscient)
+36,A sticky-note magnum opus featuring birds
+37,Moka Pot
+38,the moon is a sickle cell
+39,The Penultimate Supper
+40,Art
+41,surrealism
+42,a god made of wires & dust
+43,a dandelion blown into the universe