Spaces:

Pavankunchala
/

Depth-Estimation-App

Runtime error

File size: 6,020 Bytes

b534665

import sys
import time
from pathlib import Path

import cv2
from openvino.inference_engine import IECore
import matplotlib.cm
import matplotlib.pyplot as plt
import numpy as np
import streamlit as st
from PIL import Image
import tempfile


DEMO_IMAGE = 'dog-new.jpg'

DEMO_VIDEO = 'demo.mp4'


@st.cache
def normalize_minmax(data):
   
    return (data - data.min()) / (data.max() - data.min())

@st.cache
def convert_result_to_image(result, colormap="inferno"):
    
    cmap = matplotlib.cm.get_cmap(colormap)
    result = result.squeeze(0)
    result = normalize_minmax(result)
    result = cmap(result)[:, :, :3] * 255
    result = result.astype(np.uint8)
    return result

@st.cache
def to_rgb(image_data) -> np.ndarray:
    
    return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)


st.title("Depth Estimation App")
st.sidebar.title('Depth Estimation')
st.sidebar.subheader('Parameters')

DEVICE = "CPU"
MODEL_FILE = "models/MiDaS_small.xml"

model_xml_path = Path(MODEL_FILE)


ie = IECore()
net = ie.read_network(model=model_xml_path, weights=model_xml_path.with_suffix(".bin"))
exec_net = ie.load_network(network=net, device_name=DEVICE)

input_key = list(exec_net.input_info)[0]
output_key = list(exec_net.outputs.keys())[0]

network_input_shape = exec_net.input_info[input_key].tensor_desc.dims
network_image_height, network_image_width = network_input_shape[2:]


app_mode = st.sidebar.selectbox('Choose the App mode',
['Run on Image','Run on Video'],index = 0)


if app_mode == "Run on Image":


    st.markdown('Running on Image')

    st.sidebar.text('Params for Image')
    st.markdown(
    """
    <style>
    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
        width: 400px;
    }
    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
        width: 400px;
        margin-left: -400px;
    }
    </style>
    """,
    unsafe_allow_html=True,
    )

    img_file_buffer = st.sidebar.file_uploader("Upload an image", type=[ "jpg", "jpeg",'png'])

    if img_file_buffer is not None:
        image = np.array(Image.open(img_file_buffer))

    else:
        demo_image = DEMO_IMAGE
        image = np.array(Image.open(demo_image))

    st.sidebar.text('Original Image')
    st.sidebar.image(image)
    resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width))
    # reshape image to network input shape NCHW
    input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)


    result = exec_net.infer(inputs={input_key: input_image})[output_key]
    # convert network result of disparity map to an image that shows
    # distance as colors
    result_image = convert_result_to_image(result=result)
    # resize back to original image shape. cv2.resize expects shape
    # in (width, height), [::-1] reverses the (height, width) shape to match this.
    result_image = cv2.resize(result_image, image.shape[:2][::-1])


    st.subheader('Output Image')

    st.image(result_image,use_column_width= True)

if app_mode =='Run on Video':

    st.markdown('Running on Video')

    use_webcam = st.sidebar.button('Use Webcam')

    video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ])

    tfflie = tempfile.NamedTemporaryFile(delete=False)

    stop_button = st.sidebar.button('Stop Processing')

    if stop_button:
        st.stop()



    if not video_file_buffer:
        if use_webcam:
            vid = cv2.VideoCapture(0)
            
        else:
            vid = cv2.VideoCapture(DEMO_VIDEO)
            tfflie.name = DEMO_VIDEO
    

        
    else:
        tfflie.write(video_file_buffer.read())
        vid = cv2.VideoCapture(tfflie.name)





    

    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
    codec = cv2.VideoWriter_fourcc('X','V','I','D')
    out = cv2.VideoWriter('output_depth.mp4', codec, fps, (width, height))

    start_time = time.perf_counter()
    total_inference_duration = 0
    stframe = st.empty()
    SCALE_OUTPUT = 1
    st.markdown("**Frame Rate**")
    kpi1_text = st.markdown("0")
    save_video = st.checkbox('Save video')

    while vid.isOpened():
        ret, image = vid.read()
        new_time = time.time()
        input_video_frame_height, input_video_frame_width = image.shape[:2]
        target_frame_height = int(input_video_frame_height * SCALE_OUTPUT)
        target_frame_width = int(input_video_frame_width * SCALE_OUTPUT)


        if not ret:
            vid.release()
            break
        resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width))
        # reshape image to network input shape NCHW
        input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)

        inference_start_time = time.perf_counter()
        result = exec_net.infer(inputs={input_key: input_image})[output_key]
        inference_stop_time = time.perf_counter()
        inference_duration = inference_stop_time - inference_start_time
        total_inference_duration += inference_duration


        result_frame = to_rgb(convert_result_to_image(result))
        # Resize image and result to target frame shape
        result_frame = cv2.resize(result_frame, (target_frame_width, target_frame_height))
        image = cv2.resize(image, (target_frame_width, target_frame_height))
        # Put image and result side by side
        stacked_frame = np.vstack((image, result_frame))
        if save_video:
            out.write(stacked_frame)

        stframe.image(stacked_frame,channels = 'BGR',use_column_width=True)
        fps = 1.0/(time.time() - new_time)
        kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{'{:.1f}'.format(fps)}</h1>", unsafe_allow_html=True)

        

    vid.release()
    out.release()
    cv2.destroyAllWindows()
    st.success('Video is Processed')
    st.stop()