Spaces:
Sleeping
Sleeping
Add VQA
Browse files- app_vqa.py +8 -2
- prismer/model/modules/roberta.py +0 -17
- prismer_model.py +2 -2
app_vqa.py
CHANGED
@@ -32,11 +32,17 @@ def create_demo():
|
|
32 |
outputs = [answer, depth, edge, normals, segmentation, object_detection, ocr]
|
33 |
|
34 |
# paths = sorted(pathlib.Path('prismer/images').glob('*'))
|
35 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# gr.Examples(examples=examples,
|
37 |
# inputs=inputs,
|
38 |
# outputs=outputs,
|
39 |
-
# fn=model.
|
40 |
# cache_examples=os.getenv('SYSTEM') == 'spaces')
|
41 |
|
42 |
paths = sorted(pathlib.Path('prismer/images').glob('*'))
|
|
|
32 |
outputs = [answer, depth, edge, normals, segmentation, object_detection, ocr]
|
33 |
|
34 |
# paths = sorted(pathlib.Path('prismer/images').glob('*'))
|
35 |
+
# ex_questions = ['What is the man on the right doing?',
|
36 |
+
# 'What is this person playing?',
|
37 |
+
# 'How many cows in this image?',
|
38 |
+
# 'What is the type of animal in this image?',
|
39 |
+
# 'What toy is it?']
|
40 |
+
#
|
41 |
+
# examples = [[path.as_posix(), 'Prismer-Base', ex_questions[i]] for i, path in enumerate(paths)]
|
42 |
# gr.Examples(examples=examples,
|
43 |
# inputs=inputs,
|
44 |
# outputs=outputs,
|
45 |
+
# fn=model.run_vqa,
|
46 |
# cache_examples=os.getenv('SYSTEM') == 'spaces')
|
47 |
|
48 |
paths = sorted(pathlib.Path('prismer/images').glob('*'))
|
prismer/model/modules/roberta.py
CHANGED
@@ -431,23 +431,6 @@ class RobertaLMHead(nn.Module):
|
|
431 |
|
432 |
|
433 |
def load_decoder(name: str, config: RobertaConfig):
|
434 |
-
# load pre-trained model file
|
435 |
-
if name in ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST:
|
436 |
-
model = RobertaForMaskedLM.from_pretrained(name, cache_dir='cache')
|
437 |
-
else:
|
438 |
-
raise RuntimeError(f"Model {name} not found")
|
439 |
-
|
440 |
-
state_dict = model.state_dict()
|
441 |
-
for key in list(state_dict.keys()):
|
442 |
-
if 'encoder.layer' in key:
|
443 |
-
new_key_ = re.sub(".attention", ".0.attention", key)
|
444 |
-
new_key_ = re.sub(".intermediate", ".0.intermediate", new_key_)
|
445 |
-
if 'attention' not in key:
|
446 |
-
new_key_ = re.sub(".output", ".0.output", new_key_)
|
447 |
-
state_dict[new_key_] = state_dict.pop(key)
|
448 |
-
|
449 |
-
# load pre-trained weights
|
450 |
roberta = RobertaForCausalLMModified(config)
|
451 |
-
roberta.load_state_dict(state_dict, strict=False)
|
452 |
return roberta
|
453 |
|
|
|
431 |
|
432 |
|
433 |
def load_decoder(name: str, config: RobertaConfig):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
roberta = RobertaForCausalLMModified(config)
|
|
|
435 |
return roberta
|
436 |
|
prismer_model.py
CHANGED
@@ -79,7 +79,7 @@ class Model:
|
|
79 |
|
80 |
# load checkpoints
|
81 |
model_name = exp_name.lower().replace('-', '_')
|
82 |
-
if
|
83 |
config = {
|
84 |
'dataset': 'demo',
|
85 |
'data_path': 'prismer/helpers',
|
@@ -94,7 +94,7 @@ class Model:
|
|
94 |
state_dict = torch.load(f'prismer/logging/pretrain_{model_name}/pytorch_model.bin', map_location='cuda:0')
|
95 |
state_dict['expert_encoder.positional_embedding'] = interpolate_pos_embed(state_dict['expert_encoder.positional_embedding'],
|
96 |
len(model.expert_encoder.positional_embedding))
|
97 |
-
elif
|
98 |
config = {
|
99 |
'dataset': 'demo',
|
100 |
'data_path': 'prismer/helpers',
|
|
|
79 |
|
80 |
# load checkpoints
|
81 |
model_name = exp_name.lower().replace('-', '_')
|
82 |
+
if mode == 'caption':
|
83 |
config = {
|
84 |
'dataset': 'demo',
|
85 |
'data_path': 'prismer/helpers',
|
|
|
94 |
state_dict = torch.load(f'prismer/logging/pretrain_{model_name}/pytorch_model.bin', map_location='cuda:0')
|
95 |
state_dict['expert_encoder.positional_embedding'] = interpolate_pos_embed(state_dict['expert_encoder.positional_embedding'],
|
96 |
len(model.expert_encoder.positional_embedding))
|
97 |
+
elif mode == 'vqa':
|
98 |
config = {
|
99 |
'dataset': 'demo',
|
100 |
'data_path': 'prismer/helpers',
|