freddyaboulton HF staff commited on
Commit
4467a7b
1 Parent(s): 619c27a
Files changed (2) hide show
  1. app.py +12 -11
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,23 +5,25 @@ from PIL import Image
5
  import torch
6
  import time
7
  import numpy as np
 
8
 
9
  from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
10
 
11
  from draw_boxes import draw_bounding_boxes
12
 
13
  image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
14
- model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
15
 
16
 
17
- SUBSAMPLE = 10
18
 
19
  @spaces.GPU
20
  def stream_object_detection(video, conf_threshold):
21
  cap = cv2.VideoCapture(video)
22
 
23
- video_codec = cv2.VideoWriter_fourcc(*"x264") # type: ignore
24
  fps = int(cap.get(cv2.CAP_PROP_FPS))
 
25
  desired_fps = fps // SUBSAMPLE
26
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
27
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
@@ -29,9 +31,8 @@ def stream_object_detection(video, conf_threshold):
29
  iterating, frame = cap.read()
30
 
31
  n_frames = 0
32
- n_chunks = 0
33
 
34
- name = f"output_{n_chunks}.ts"
35
  segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore
36
  batch = []
37
 
@@ -41,15 +42,16 @@ def stream_object_detection(video, conf_threshold):
41
  if n_frames % SUBSAMPLE == 0:
42
  batch.append(frame)
43
  if len(batch) == 2 * desired_fps:
44
- inputs = image_processor(images=batch, return_tensors="pt")
45
 
46
  print(f"starting batch of size {len(batch)}")
47
  start = time.time()
48
  with torch.no_grad():
49
  outputs = model(**inputs)
50
  end = time.time()
51
- print("time taken ", end - start)
52
 
 
53
  boxes = image_processor.post_process_object_detection(
54
  outputs,
55
  target_sizes=torch.tensor([(height, width)] * len(batch)),
@@ -57,7 +59,6 @@ def stream_object_detection(video, conf_threshold):
57
 
58
  for i, (array, box) in enumerate(zip(batch, boxes)):
59
  pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
60
- pil_image.save(f"batch_{n_chunks}_detection_{i}.png")
61
  frame = np.array(pil_image)
62
  # Convert RGB to BGR
63
  frame = frame[:, :, ::-1].copy()
@@ -66,9 +67,9 @@ def stream_object_detection(video, conf_threshold):
66
  batch = []
67
  segment_file.release()
68
  yield name
69
- n_frames = 0
70
- n_chunks += 1
71
- name = f"output_{n_chunks}.ts"
72
  segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore
73
 
74
  iterating, frame = cap.read()
 
5
  import torch
6
  import time
7
  import numpy as np
8
+ import uuid
9
 
10
  from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
11
 
12
  from draw_boxes import draw_bounding_boxes
13
 
14
  image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
15
+ model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd").to("cuda")
16
 
17
 
18
+ SUBSAMPLE = 2
19
 
20
  @spaces.GPU
21
  def stream_object_detection(video, conf_threshold):
22
  cap = cv2.VideoCapture(video)
23
 
24
+ video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
25
  fps = int(cap.get(cv2.CAP_PROP_FPS))
26
+
27
  desired_fps = fps // SUBSAMPLE
28
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
29
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
 
31
  iterating, frame = cap.read()
32
 
33
  n_frames = 0
 
34
 
35
+ name = f"output_{uuid.uuid4()}.mp4"
36
  segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore
37
  batch = []
38
 
 
42
  if n_frames % SUBSAMPLE == 0:
43
  batch.append(frame)
44
  if len(batch) == 2 * desired_fps:
45
+ inputs = image_processor(images=batch, return_tensors="pt").to("cuda")
46
 
47
  print(f"starting batch of size {len(batch)}")
48
  start = time.time()
49
  with torch.no_grad():
50
  outputs = model(**inputs)
51
  end = time.time()
52
+ print("time taken for inference", end - start)
53
 
54
+ start = time.time()
55
  boxes = image_processor.post_process_object_detection(
56
  outputs,
57
  target_sizes=torch.tensor([(height, width)] * len(batch)),
 
59
 
60
  for i, (array, box) in enumerate(zip(batch, boxes)):
61
  pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
 
62
  frame = np.array(pil_image)
63
  # Convert RGB to BGR
64
  frame = frame[:, :, ::-1].copy()
 
67
  batch = []
68
  segment_file.release()
69
  yield name
70
+ end = time.time()
71
+ print("time taken for processing boxes", end - start)
72
+ name = f"output_{uuid.uuid4()}.mp4"
73
  segment_file = cv2.VideoWriter(name, video_codec, desired_fps, (width, height)) # type: ignore
74
 
75
  iterating, frame = cap.read()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  safetensors==0.4.3
2
  opencv-python
3
  torch
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
  safetensors==0.4.3
3
  opencv-python
4
  torch