Spaces:
Running
on
T4
Running
on
T4
Removed cuda dependency
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ def install_add_dependencies():
|
|
31 |
script = file.read()
|
32 |
return call(script, shell=True)
|
33 |
|
34 |
-
install_add_dependencies()
|
35 |
|
36 |
class AppSteps(Enum):
|
37 |
JUST_TEXT = 1
|
@@ -63,9 +63,6 @@ def get_args_parser():
|
|
63 |
|
64 |
# training parameters
|
65 |
parser.add_argument("--note", default="", help="add some notes to the experiment")
|
66 |
-
parser.add_argument(
|
67 |
-
"--device", default="cuda", help="device to use for training / testing"
|
68 |
-
)
|
69 |
parser.add_argument("--resume", default="", help="resume from checkpoint")
|
70 |
parser.add_argument(
|
71 |
"--pretrain_model_path",
|
@@ -106,6 +103,10 @@ def get_args_parser():
|
|
106 |
@spaces.GPU
|
107 |
# Get counting model.
|
108 |
def build_model_and_transforms(args):
|
|
|
|
|
|
|
|
|
109 |
normalize = T.Compose(
|
110 |
[T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
|
111 |
)
|
@@ -208,25 +209,26 @@ def get_ind_to_filter(text, word_ids, keywords):
|
|
208 |
|
209 |
@spaces.GPU
|
210 |
def count(image, text, prompts, state):
|
|
|
211 |
print("state: " + str(state))
|
212 |
keywords = "" # do not handle this for now
|
213 |
# Handle no prompt case.
|
214 |
if prompts is None:
|
215 |
prompts = {"image": image, "points": []}
|
216 |
input_image, _ = transform(image, {"exemplars": torch.tensor([])})
|
217 |
-
input_image = input_image.unsqueeze(0).
|
218 |
exemplars = get_box_inputs(prompts["points"])
|
219 |
print(exemplars)
|
220 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
221 |
-
input_image_exemplars = input_image_exemplars.unsqueeze(0).
|
222 |
-
exemplars = [exemplars["exemplars"].
|
223 |
|
224 |
with torch.no_grad():
|
225 |
model_output = model(
|
226 |
nested_tensor_from_tensor_list(input_image),
|
227 |
nested_tensor_from_tensor_list(input_image_exemplars),
|
228 |
exemplars,
|
229 |
-
[torch.tensor([0]).
|
230 |
captions=[text + " ."] * len(input_image),
|
231 |
)
|
232 |
|
@@ -297,24 +299,25 @@ def count(image, text, prompts, state):
|
|
297 |
|
298 |
@spaces.GPU
|
299 |
def count_main(image, text, prompts):
|
|
|
300 |
keywords = "" # do not handle this for now
|
301 |
# Handle no prompt case.
|
302 |
if prompts is None:
|
303 |
prompts = {"image": image, "points": []}
|
304 |
input_image, _ = transform(image, {"exemplars": torch.tensor([])})
|
305 |
-
input_image = input_image.unsqueeze(0).
|
306 |
exemplars = get_box_inputs(prompts["points"])
|
307 |
print(exemplars)
|
308 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
309 |
-
input_image_exemplars = input_image_exemplars.unsqueeze(0).
|
310 |
-
exemplars = [exemplars["exemplars"].
|
311 |
|
312 |
with torch.no_grad():
|
313 |
model_output = model(
|
314 |
nested_tensor_from_tensor_list(input_image),
|
315 |
nested_tensor_from_tensor_list(input_image_exemplars),
|
316 |
exemplars,
|
317 |
-
[torch.tensor([0]).
|
318 |
captions=[text + " ."] * len(input_image),
|
319 |
)
|
320 |
|
|
|
31 |
script = file.read()
|
32 |
return call(script, shell=True)
|
33 |
|
34 |
+
#install_add_dependencies()
|
35 |
|
36 |
class AppSteps(Enum):
|
37 |
JUST_TEXT = 1
|
|
|
63 |
|
64 |
# training parameters
|
65 |
parser.add_argument("--note", default="", help="add some notes to the experiment")
|
|
|
|
|
|
|
66 |
parser.add_argument("--resume", default="", help="resume from checkpoint")
|
67 |
parser.add_argument(
|
68 |
"--pretrain_model_path",
|
|
|
103 |
@spaces.GPU
|
104 |
# Get counting model.
|
105 |
def build_model_and_transforms(args):
|
106 |
+
if torch.cuda.is_available():
|
107 |
+
args.device = torch.device('cuda')
|
108 |
+
else:
|
109 |
+
args.device = torch.device('cpu')
|
110 |
normalize = T.Compose(
|
111 |
[T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
|
112 |
)
|
|
|
209 |
|
210 |
@spaces.GPU
|
211 |
def count(image, text, prompts, state):
|
212 |
+
global args
|
213 |
print("state: " + str(state))
|
214 |
keywords = "" # do not handle this for now
|
215 |
# Handle no prompt case.
|
216 |
if prompts is None:
|
217 |
prompts = {"image": image, "points": []}
|
218 |
input_image, _ = transform(image, {"exemplars": torch.tensor([])})
|
219 |
+
input_image = input_image.unsqueeze(0).to(args.device)
|
220 |
exemplars = get_box_inputs(prompts["points"])
|
221 |
print(exemplars)
|
222 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
223 |
+
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(args.device)
|
224 |
+
exemplars = [exemplars["exemplars"].to(args.device)]
|
225 |
|
226 |
with torch.no_grad():
|
227 |
model_output = model(
|
228 |
nested_tensor_from_tensor_list(input_image),
|
229 |
nested_tensor_from_tensor_list(input_image_exemplars),
|
230 |
exemplars,
|
231 |
+
[torch.tensor([0]).to(args.device) for _ in range(len(input_image))],
|
232 |
captions=[text + " ."] * len(input_image),
|
233 |
)
|
234 |
|
|
|
299 |
|
300 |
@spaces.GPU
|
301 |
def count_main(image, text, prompts):
|
302 |
+
global args
|
303 |
keywords = "" # do not handle this for now
|
304 |
# Handle no prompt case.
|
305 |
if prompts is None:
|
306 |
prompts = {"image": image, "points": []}
|
307 |
input_image, _ = transform(image, {"exemplars": torch.tensor([])})
|
308 |
+
input_image = input_image.unsqueeze(0).to(args.device)
|
309 |
exemplars = get_box_inputs(prompts["points"])
|
310 |
print(exemplars)
|
311 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
312 |
+
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(args.device)
|
313 |
+
exemplars = [exemplars["exemplars"].to(args.device)]
|
314 |
|
315 |
with torch.no_grad():
|
316 |
model_output = model(
|
317 |
nested_tensor_from_tensor_list(input_image),
|
318 |
nested_tensor_from_tensor_list(input_image_exemplars),
|
319 |
exemplars,
|
320 |
+
[torch.tensor([0]).to(args.device) for _ in range(len(input_image))],
|
321 |
captions=[text + " ."] * len(input_image),
|
322 |
)
|
323 |
|