import gradio as gr
import torch
from torchvision import transforms
from PIL import Image
import numpy as np
import os
from u2net import U2NET
import data_transforms
import torch.nn.functional as F
from skimage import io
from torchvision.transforms.functional import normalize

# Load the model
model = U2NET(3, 1)
model_path = "u2net.pth"
model.load_state_dict(torch.load(model_path, map_location="cpu"))
model.eval()

# Preprocess the image
def preprocess(image):
    label_3 = np.zeros(image.shape)
    label = np.zeros(label_3.shape[0:2])

    if 3 == len(label_3.shape):
        label = label_3[:, :, 0]
    elif 2 == len(label_3.shape):
        label = label_3

    if 3 == len(image.shape) and 2 == len(label.shape):
        label = label[:, :, np.newaxis]
    elif 2 == len(image.shape) and 2 == len(label.shape):
        image = image[:, :, np.newaxis]
        label = label[:, :, np.newaxis]

    transform = transforms.Compose([data_transforms.RescaleT(320), data_transforms.ToTensorLab(flag=0)])
    sample = transform({"imidx": np.array([0]), "image": image, "label": label})

    return sample

# Generate the mask
def generate_mask(image):
    # Preprocess the image
    image = np.array(image.convert("RGB"))
    img = preprocess(image)
    
    input_size = [1024, 1024]
    im_shp = image.shape[0:2]
    im_tensor = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
    
    # Replace F.upsample with F.interpolate
    im_tensor = F.interpolate(torch.unsqueeze(im_tensor, 0), input_size, mode="bilinear").type(torch.uint8)
    image = torch.divide(im_tensor, 255.0)
    image = normalize(image, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0])

    with torch.no_grad():
        result = model(image)
        result = torch.squeeze(F.interpolate(result[0][0], im_shp, mode='bilinear'), 0)
        ma = torch.max(result)
        mi = torch.min(result)
        result = (result - mi) / (ma - mi)
        result = result.numpy()

    output_mask = result[0]
    output_mask = (output_mask - output_mask.min()) / (output_mask.max() - output_mask.min()) * 255
    output_mask = output_mask.astype(np.uint8)

    return output_mask

# Define the final predict method to overlay the mask
def predict(image):
    # Generate the mask
    mask = generate_mask(image)
    
    # Convert the image to RGBA (to support transparency)
    image = image.convert("RGBA")
    
    # Convert the mask into a binary mask where 255 is kept and 0 is transparent
    mask = Image.fromarray(mask).resize(image.size).convert("L")  # Convert to grayscale (L mode)
    
    # Create a new image with transparency (RGBA)
    transparent_image = Image.new("RGBA", image.size)
    
    # Use the mask as transparency: paste the original image where the mask is white
    transparent_image.paste(image, mask=mask)
    
    return transparent_image

# Create the Gradio interface with custom output size for the display only (not affecting the saved image)
iface = gr.Interface(
    fn=predict, 
    inputs=gr.Image(type="pil"), 
    outputs=gr.Image(type="pil", tool="editor", label="Edited Image"),  # Adjust the box size
    title="Background Removal with U2NET",
    description="Upload an image and remove the background"
)

if __name__ == "__main__":
    iface.launch()