image-label-3

Sleeping

File size: 7,463 Bytes

d39fc00

from __future__ import annotations

import functools
import io
import urllib
from typing import Tuple, List, Any

import huggingface_hub
import onnxruntime as rt
import pandas as pd
import numpy as np
import PIL.Image
import requests

import dbimutils
import piexif
import piexif.helper
from urllib.request import urlopen

import model

HF_TOKEN = ""
SWIN_MODEL_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
CONV_MODEL_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
CONV2_MODEL_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
VIT_MODEL_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
MODEL_FILENAME = "model.onnx"
LABEL_FILENAME = "selected_tags.csv"


def change_model(model_name):
    global loaded_models

    if model_name == "SwinV2":
        model = load_model(SWIN_MODEL_REPO, MODEL_FILENAME)
    elif model_name == "ConvNext":
        model = load_model(CONV_MODEL_REPO, MODEL_FILENAME)
    elif model_name == "ConvNextV2":
        model = load_model(CONV2_MODEL_REPO, MODEL_FILENAME)
    elif model_name == "ViT":
        model = load_model(VIT_MODEL_REPO, MODEL_FILENAME)

    loaded_models[model_name] = model
    return loaded_models[model_name]


def load_model(model_repo: str, model_filename: str) -> rt.InferenceSession:
    path = huggingface_hub.hf_hub_download(
        model_repo, model_filename, use_auth_token=HF_TOKEN
    )
    model = rt.InferenceSession(path)
    return model


def load_labels() -> tuple[list[Any], list[Any], list[Any], list[Any]]:
    path = huggingface_hub.hf_hub_download(
        CONV2_MODEL_REPO, LABEL_FILENAME, use_auth_token=HF_TOKEN
    )
    df = pd.read_csv(path)

    tag_names = df["name"].tolist()
    rating_indexes = list(np.where(df["category"] == 9)[0])
    general_indexes = list(np.where(df["category"] == 0)[0])
    character_indexes = list(np.where(df["category"] == 4)[0])
    return tag_names, rating_indexes, general_indexes, character_indexes


def predict(
        image: PIL.Image.Image,
        model_name: str,
        general_threshold: float,
        character_threshold: float,
        tag_names: list[str],
        rating_indexes: list[np.int64],
        general_indexes: list[np.int64],
        character_indexes: list[np.int64],
):
    global loaded_models

    if isinstance(image, str):
        rawimage = dbimutils.read_img_from_url(image)
    elif isinstance(image, PIL.Image.Image):
        rawimage = image
    else:
        raise Exception("Invalid image type")

    image = rawimage

    model = loaded_models[model_name]
    if model is None:
        model = change_model(model_name)

    _, height, width, _ = model.get_inputs()[0].shape

    # Alpha to white
    image = image.convert("RGBA")
    new_image = PIL.Image.new("RGBA", image.size, "WHITE")
    new_image.paste(image, mask=image)
    image = new_image.convert("RGB")
    image = np.asarray(image)

    # PIL RGB to OpenCV BGR
    image = image[:, :, ::-1]

    image = dbimutils.make_square(image, height)
    image = dbimutils.smart_resize(image, height)
    image = image.astype(np.float32)
    image = np.expand_dims(image, 0)

    input_name = model.get_inputs()[0].name
    label_name = model.get_outputs()[0].name
    probs = model.run([label_name], {input_name: image})[0]

    labels = list(zip(tag_names, probs[0].astype(float)))

    # First 4 labels are actually ratings: pick one with argmax
    ratings_names = [labels[i] for i in rating_indexes]
    rating = dict(ratings_names)

    # Then we have general tags: pick any where prediction confidence > threshold
    general_names = [labels[i] for i in general_indexes]
    general_res = [x for x in general_names if x[1] > general_threshold]
    general_res = dict(general_res)

    # Everything else is characters: pick any where prediction confidence > threshold
    character_names = [labels[i] for i in character_indexes]
    character_res = [x for x in character_names if x[1] > character_threshold]
    character_res = dict(character_res)

    b = dict(sorted(general_res.items(), key=lambda item: item[1], reverse=True))
    a = (
        ", ".join(list(b.keys()))
        .replace("_", " ")
        .replace("(", "\(")
        .replace(")", "\)")
    )
    c = ", ".join(list(b.keys()))

    items = rawimage.info
    geninfo = ""

    if "exif" in rawimage.info:
        exif = piexif.load(rawimage.info["exif"])
        exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b"")
        try:
            exif_comment = piexif.helper.UserComment.load(exif_comment)
        except ValueError:
            exif_comment = exif_comment.decode("utf8", errors="ignore")

        items["exif comment"] = exif_comment
        geninfo = exif_comment

        for field in [
            "jfif",
            "jfif_version",
            "jfif_unit",
            "jfif_density",
            "dpi",
            "exif",
            "loop",
            "background",
            "timestamp",
            "duration",
        ]:
            items.pop(field, None)

    geninfo = items.get("parameters", geninfo)

    for key, text in items.items():
        print(key)
        print(text)

    print("geninfo", geninfo)
    print("a", a)
    print("c", c)
    print("rating", rating)
    print("character_res", character_res)
    print("general_res", general_res)

    character_res = list(filter(lambda x: x['confidence'] > 0.4, [{'tag': tag, 'confidence': score}
                                                                  for tag, score in character_res.items()]))

    general_res = list(filter(lambda x: x['confidence'] > 0.4, [{'tag': tag, 'confidence': score}
                                                                for tag, score in general_res.items()]))

    return {'a': a, 'c': c, 'rating': rating, 'character_res': character_res, 'general_res': general_res}


def label_img(
        image: PIL.Image.Image | str,
        model: str,
        # model: (["SwinV2", "ConvNext", "ConvNextV2", "ViT"], value="ConvNextV2", label="Model"),
        l_score_general_threshold: float,
        l_score_character_threshold: float,
):
    if isinstance(image, str) and image.startswith("http"):
        image = dbimutils.read_img_from_url(image)

    global loaded_models
    loaded_models = {"SwinV2": None, "ConvNext": None, "ConvNextV2": None, "ViT": None}

    change_model("ConvNextV2")

    tag_names, rating_indexes, general_indexes, character_indexes = load_labels()

    func = functools.partial(
        predict,
        tag_names=tag_names,
        rating_indexes=rating_indexes,
        general_indexes=general_indexes,
        character_indexes=character_indexes,
    )

    return func(
        image=image, model_name=model,
        general_threshold=l_score_general_threshold,
        character_threshold=l_score_character_threshold,
    )


def write_image_tag(img_id: int, is_valid: bool, tags: List[model.ImageTag], callback_url: str):
    model.ImageScanCallbackRequest(img_id=img_id, is_valid=is_valid, tags=tags)


if __name__ == "__main__":
    score_slider_step = 0.05
    score_general_threshold = 0.35
    score_character_threshold = 0.85

    ret = label_img(
        image='https://pub-9747017e9ec54620bfbe2385f14fe4d7.r2.dev/cnGirlYcy_v10_people_network_nannansleep/cnGirlYcy_v10_people_network_nannansleep_r_1679670778_0.png',
        model="SwinV2",
        l_score_general_threshold=score_general_threshold,
        l_score_character_threshold=score_character_threshold,
    )
    print(ret)