import gradio as gr
import yolov5
import numpy as np
from PIL import Image, ImageDraw, ImageFilter
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation
import torchvision.transforms
import torch

person_processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_cityscapes_swin_large")
person_model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_cityscapes_swin_large")
transform = torchvision.transforms.ToPILImage()

lp_model = yolov5.load('keremberke/yolov5m-license-plate')  
lp_model.conf = 0.25  # NMS confidence threshold
lp_model.iou = 0.45  # NMS IoU threshold
lp_model.agnostic = False  # NMS class-agnostic
lp_model.multi_label = False  # NMS multiple labels per box
lp_model.max_det = 1000  # maximum number of detections per image

def detect_person(image: Image):
    semantic_inputs = person_processor(images=image, task_inputs=["semantic"], return_tensors="pt")
    semantic_outputs = person_model(**semantic_inputs)
    predicted_semantic_map = person_processor.post_process_semantic_segmentation(semantic_outputs, target_sizes=[image.size[::-1]])[0]
    mask = transform(predicted_semantic_map.to(torch.uint8))
    mask = Image.eval(mask, lambda x: 0 if x == 11 else 255)
    return mask  


def detect_license_plate(image: Image):
    results = lp_model(image, size=image.size[0])
    predictions = results.pred[0]
    boxes = predictions[:, :4]
    mask = Image.new(mode="L", size=image.size, color=255)
    draw = ImageDraw.Draw(mask)
    for box in boxes:
        draw.rectangle(list(box), fill=0)
    return mask


def detect_dummy(image: Image):
    return Image.new(mode="L", size=image.size, color=255)


detectors = {
    "Person": detect_person,
    "License Plate": detect_license_plate
}


def anonymize(path: str, detectors: list):
    # Read image
    image = Image.open(path)
    # Run requested detectors 
    masks = [implemented_detectors.get(det, detect_dummy)(image) for det in detectors]
    # Combine masks
    combined = np.minimum.reduce([np.array(m) for m in masks])
    mask = Image.fromarray(combined)
    # Apply blur through mask
    blurred = image.filter(ImageFilter.GaussianBlur(15))
    anonymized = Image.composite(image, blurred, mask)
    return anonymized    


def test_gradio(image):
    masks = [detect_person(image), detect_license_plate(image)]
    combined = np.minimum.reduce([np.array(m) for m in masks])
    mask = Image.fromarray(combined)
    # Apply blur through mask
    blurred = image.filter(ImageFilter.GaussianBlur(15))
    anonymized = Image.composite(image, blurred, mask)
    return anonymized


demo = gr.Interface(fn=test_gradio, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil"))
demo.launch(share=True)
#demo.launch(server_name="localhost", server_port=8080)