import sys import time from pathlib import Path import cv2 from openvino.inference_engine import IECore import matplotlib.cm import matplotlib.pyplot as plt import numpy as np import streamlit as st from PIL import Image import tempfile DEMO_IMAGE = 'dog-new.jpg' DEMO_VIDEO = 'demo.mp4' @st.cache def normalize_minmax(data): return (data - data.min()) / (data.max() - data.min()) @st.cache def convert_result_to_image(result, colormap="inferno"): cmap = matplotlib.cm.get_cmap(colormap) result = result.squeeze(0) result = normalize_minmax(result) result = cmap(result)[:, :, :3] * 255 result = result.astype(np.uint8) return result @st.cache def to_rgb(image_data) -> np.ndarray: return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB) st.title("Depth Estimation App") st.sidebar.title('Depth Estimation') st.sidebar.subheader('Parameters') DEVICE = "CPU" MODEL_FILE = "models/MiDaS_small.xml" model_xml_path = Path(MODEL_FILE) ie = IECore() net = ie.read_network(model=model_xml_path, weights=model_xml_path.with_suffix(".bin")) exec_net = ie.load_network(network=net, device_name=DEVICE) input_key = list(exec_net.input_info)[0] output_key = list(exec_net.outputs.keys())[0] network_input_shape = exec_net.input_info[input_key].tensor_desc.dims network_image_height, network_image_width = network_input_shape[2:] app_mode = st.sidebar.selectbox('Choose the App mode', ['Run on Image','Run on Video'],index = 0) if app_mode == "Run on Image": st.markdown('Running on Image') st.sidebar.text('Params for Image') st.markdown( """ """, unsafe_allow_html=True, ) img_file_buffer = st.sidebar.file_uploader("Upload an image", type=[ "jpg", "jpeg",'png']) if img_file_buffer is not None: image = np.array(Image.open(img_file_buffer)) else: demo_image = DEMO_IMAGE image = np.array(Image.open(demo_image)) st.sidebar.text('Original Image') st.sidebar.image(image) resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width)) # reshape image to network input shape NCHW input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0) result = exec_net.infer(inputs={input_key: input_image})[output_key] # convert network result of disparity map to an image that shows # distance as colors result_image = convert_result_to_image(result=result) # resize back to original image shape. cv2.resize expects shape # in (width, height), [::-1] reverses the (height, width) shape to match this. result_image = cv2.resize(result_image, image.shape[:2][::-1]) st.subheader('Output Image') st.image(result_image,use_column_width= True) if app_mode =='Run on Video': st.markdown('Running on Video') use_webcam = st.sidebar.button('Use Webcam') video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ]) tfflie = tempfile.NamedTemporaryFile(delete=False) stop_button = st.sidebar.button('Stop Processing') if stop_button: st.stop() if not video_file_buffer: if use_webcam: vid = cv2.VideoCapture(0) else: vid = cv2.VideoCapture(DEMO_VIDEO) tfflie.name = DEMO_VIDEO else: tfflie.write(video_file_buffer.read()) vid = cv2.VideoCapture(tfflie.name) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS))#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) codec = cv2.VideoWriter_fourcc('X','V','I','D') out = cv2.VideoWriter('output_depth.mp4', codec, fps, (width, height)) start_time = time.perf_counter() total_inference_duration = 0 stframe = st.empty() SCALE_OUTPUT = 1 st.markdown("**Frame Rate**") kpi1_text = st.markdown("0") save_video = st.checkbox('Save video') while vid.isOpened(): ret, image = vid.read() new_time = time.time() input_video_frame_height, input_video_frame_width = image.shape[:2] target_frame_height = int(input_video_frame_height * SCALE_OUTPUT) target_frame_width = int(input_video_frame_width * SCALE_OUTPUT) if not ret: vid.release() break resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width)) # reshape image to network input shape NCHW input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0) inference_start_time = time.perf_counter() result = exec_net.infer(inputs={input_key: input_image})[output_key] inference_stop_time = time.perf_counter() inference_duration = inference_stop_time - inference_start_time total_inference_duration += inference_duration result_frame = to_rgb(convert_result_to_image(result)) # Resize image and result to target frame shape result_frame = cv2.resize(result_frame, (target_frame_width, target_frame_height)) image = cv2.resize(image, (target_frame_width, target_frame_height)) # Put image and result side by side stacked_frame = np.vstack((image, result_frame)) if save_video: out.write(stacked_frame) stframe.image(stacked_frame,channels = 'BGR',use_column_width=True) fps = 1.0/(time.time() - new_time) kpi1_text.write(f"