Spaces:
Runtime error
Runtime error
import sys | |
import time | |
from pathlib import Path | |
import cv2 | |
from openvino.inference_engine import IECore | |
import matplotlib.cm | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import streamlit as st | |
from PIL import Image | |
import tempfile | |
DEMO_IMAGE = 'dog-new.jpg' | |
DEMO_VIDEO = 'demo.mp4' | |
def normalize_minmax(data): | |
return (data - data.min()) / (data.max() - data.min()) | |
def convert_result_to_image(result, colormap="inferno"): | |
cmap = matplotlib.cm.get_cmap(colormap) | |
result = result.squeeze(0) | |
result = normalize_minmax(result) | |
result = cmap(result)[:, :, :3] * 255 | |
result = result.astype(np.uint8) | |
return result | |
def to_rgb(image_data) -> np.ndarray: | |
return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB) | |
st.title("Depth Estimation App") | |
st.sidebar.title('Depth Estimation') | |
st.sidebar.subheader('Parameters') | |
DEVICE = "CPU" | |
MODEL_FILE = "models/MiDaS_small.xml" | |
model_xml_path = Path(MODEL_FILE) | |
ie = IECore() | |
net = ie.read_network(model=model_xml_path, weights=model_xml_path.with_suffix(".bin")) | |
exec_net = ie.load_network(network=net, device_name=DEVICE) | |
input_key = list(exec_net.input_info)[0] | |
output_key = list(exec_net.outputs.keys())[0] | |
network_input_shape = exec_net.input_info[input_key].tensor_desc.dims | |
network_image_height, network_image_width = network_input_shape[2:] | |
app_mode = st.sidebar.selectbox('Choose the App mode', | |
['Run on Image','Run on Video'],index = 0) | |
if app_mode == "Run on Image": | |
st.markdown('Running on Image') | |
st.sidebar.text('Params for Image') | |
st.markdown( | |
""" | |
<style> | |
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child { | |
width: 400px; | |
} | |
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child { | |
width: 400px; | |
margin-left: -400px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
img_file_buffer = st.sidebar.file_uploader("Upload an image", type=[ "jpg", "jpeg",'png']) | |
if img_file_buffer is not None: | |
image = np.array(Image.open(img_file_buffer)) | |
else: | |
demo_image = DEMO_IMAGE | |
image = np.array(Image.open(demo_image)) | |
st.sidebar.text('Original Image') | |
st.sidebar.image(image) | |
resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width)) | |
# reshape image to network input shape NCHW | |
input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0) | |
result = exec_net.infer(inputs={input_key: input_image})[output_key] | |
# convert network result of disparity map to an image that shows | |
# distance as colors | |
result_image = convert_result_to_image(result=result) | |
# resize back to original image shape. cv2.resize expects shape | |
# in (width, height), [::-1] reverses the (height, width) shape to match this. | |
result_image = cv2.resize(result_image, image.shape[:2][::-1]) | |
st.subheader('Output Image') | |
st.image(result_image,use_column_width= True) | |
if app_mode =='Run on Video': | |
st.markdown('Running on Video') | |
use_webcam = st.sidebar.button('Use Webcam') | |
video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ]) | |
tfflie = tempfile.NamedTemporaryFile(delete=False) | |
stop_button = st.sidebar.button('Stop Processing') | |
if stop_button: | |
st.stop() | |
if not video_file_buffer: | |
if use_webcam: | |
vid = cv2.VideoCapture(0) | |
else: | |
vid = cv2.VideoCapture(DEMO_VIDEO) | |
tfflie.name = DEMO_VIDEO | |
else: | |
tfflie.write(video_file_buffer.read()) | |
vid = cv2.VideoCapture(tfflie.name) | |
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
fps = int(vid.get(cv2.CAP_PROP_FPS))#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) | |
codec = cv2.VideoWriter_fourcc('X','V','I','D') | |
out = cv2.VideoWriter('output_depth.mp4', codec, fps, (width, height)) | |
start_time = time.perf_counter() | |
total_inference_duration = 0 | |
stframe = st.empty() | |
SCALE_OUTPUT = 1 | |
st.markdown("**Frame Rate**") | |
kpi1_text = st.markdown("0") | |
save_video = st.checkbox('Save video') | |
while vid.isOpened(): | |
ret, image = vid.read() | |
new_time = time.time() | |
input_video_frame_height, input_video_frame_width = image.shape[:2] | |
target_frame_height = int(input_video_frame_height * SCALE_OUTPUT) | |
target_frame_width = int(input_video_frame_width * SCALE_OUTPUT) | |
if not ret: | |
vid.release() | |
break | |
resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width)) | |
# reshape image to network input shape NCHW | |
input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0) | |
inference_start_time = time.perf_counter() | |
result = exec_net.infer(inputs={input_key: input_image})[output_key] | |
inference_stop_time = time.perf_counter() | |
inference_duration = inference_stop_time - inference_start_time | |
total_inference_duration += inference_duration | |
result_frame = to_rgb(convert_result_to_image(result)) | |
# Resize image and result to target frame shape | |
result_frame = cv2.resize(result_frame, (target_frame_width, target_frame_height)) | |
image = cv2.resize(image, (target_frame_width, target_frame_height)) | |
# Put image and result side by side | |
stacked_frame = np.hstack((image, result_frame)) | |
if save_video: | |
out.write(stacked_frame) | |
stframe.image(stacked_frame,channels = 'BGR',use_column_width=True) | |
fps = 1.0/(time.time() - new_time) | |
kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{'{:.1f}'.format(fps)}</h1>", unsafe_allow_html=True) | |
vid.release() | |
out.release() | |
cv2.destroyAllWindows() | |
st.success('Video is Processed') | |
st.stop() |