modeling script
Browse files- .gitignore +15 -0
- modeling.py +8 -7
- ultra/eval.py +153 -0
- ultra/layers.py +1 -1
- ultra/util.py +7 -0
.gitignore
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
output/
|
10 |
+
.vscode/
|
11 |
+
.DS_Store
|
12 |
+
datasets/
|
13 |
+
ckpts/
|
14 |
+
*.csv
|
15 |
+
*.txt
|
modeling.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
from transformers import PretrainedConfig, PreTrainedModel
|
4 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
5 |
from ultra.models import Ultra
|
6 |
-
import
|
|
|
7 |
|
8 |
|
9 |
class UltraConfig(PretrainedConfig):
|
@@ -58,8 +59,8 @@ class UltraLinkPrediction(PreTrainedModel):
|
|
58 |
|
59 |
if __name__ == "__main__":
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
+
from transformers import PretrainedConfig, PreTrainedModel
|
4 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
5 |
from ultra.models import Ultra
|
6 |
+
from ultra.datasets import WN18RR, CoDExSmall, FB15k237, FB15k237Inductive
|
7 |
+
from ultra.eval import test
|
8 |
|
9 |
|
10 |
class UltraConfig(PretrainedConfig):
|
|
|
59 |
|
60 |
if __name__ == "__main__":
|
61 |
|
62 |
+
model = UltraLinkPrediction.from_pretrained("mgalkin/ultra_3g")
|
63 |
+
dataset = CoDExSmall(root="./datasets/")
|
64 |
+
test(model, mode="test", dataset=dataset, gpus=None)
|
65 |
+
# mrr: 0.472035
|
66 |
+
# hits@10: 0.66849
|
ultra/eval.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch import distributed as dist
|
5 |
+
from torch.utils import data as torch_data
|
6 |
+
from torch_geometric.data import Data
|
7 |
+
|
8 |
+
from ultra import tasks, util
|
9 |
+
|
10 |
+
|
11 |
+
TRANSDUCTIVE = ("WordNet18RR", "RelLinkPredDataset", "CoDExSmall", "CoDExMedium", "CoDExLarge",
|
12 |
+
"YAGO310", "NELL995", "ConceptNet100k", "DBpedia100k", "Hetionet", "AristoV4",
|
13 |
+
"WDsinger", "NELL23k", "FB15k237_10", "FB15k237_20", "FB15k237_50")
|
14 |
+
|
15 |
+
|
16 |
+
def get_filtered_data(dataset, mode):
|
17 |
+
train_data, valid_data, test_data = dataset[0], dataset[1], dataset[2]
|
18 |
+
ds_name = dataset.__class__.__name__
|
19 |
+
|
20 |
+
if ds_name in TRANSDUCTIVE:
|
21 |
+
filtered_data = Data(edge_index=dataset._data.target_edge_index, edge_type=dataset._data.target_edge_type, num_nodes=dataset[0].num_nodes)
|
22 |
+
else:
|
23 |
+
if "ILPC" in ds_name or "Ingram" in ds_name:
|
24 |
+
full_inference_edges = torch.cat([valid_data.edge_index, valid_data.target_edge_index, test_data.target_edge_index], dim=1)
|
25 |
+
full_inference_etypes = torch.cat([valid_data.edge_type, valid_data.target_edge_type, test_data.target_edge_type])
|
26 |
+
filtered_data = Data(edge_index=full_inference_edges, edge_type=full_inference_etypes, num_nodes=test_data.num_nodes)
|
27 |
+
else:
|
28 |
+
# test filtering graph: inference edges + test edges
|
29 |
+
full_inference_edges = torch.cat([test_data.edge_index, test_data.target_edge_index], dim=1)
|
30 |
+
full_inference_etypes = torch.cat([test_data.edge_type, test_data.target_edge_type])
|
31 |
+
if mode == "test":
|
32 |
+
filtered_data = Data(edge_index=full_inference_edges, edge_type=full_inference_etypes, num_nodes=test_data.num_nodes)
|
33 |
+
else:
|
34 |
+
# validation filtering graph: train edges + validation edges
|
35 |
+
filtered_data = Data(
|
36 |
+
edge_index=torch.cat([train_data.edge_index, valid_data.target_edge_index], dim=1),
|
37 |
+
edge_type=torch.cat([train_data.edge_type, valid_data.target_edge_type])
|
38 |
+
)
|
39 |
+
|
40 |
+
return filtered_data
|
41 |
+
|
42 |
+
|
43 |
+
@torch.no_grad()
|
44 |
+
def test(model, mode, dataset, batch_size=32, eval_metrics=["mrr", "hits@10"], gpus=None, return_metrics=False):
|
45 |
+
logger = util.get_root_logger()
|
46 |
+
test_data = dataset[1] if mode == "valid" else dataset[2]
|
47 |
+
filtered_data = get_filtered_data(dataset, mode)
|
48 |
+
|
49 |
+
device = util.get_devices(gpus)
|
50 |
+
world_size = util.get_world_size()
|
51 |
+
rank = util.get_rank()
|
52 |
+
|
53 |
+
test_triplets = torch.cat([test_data.target_edge_index, test_data.target_edge_type.unsqueeze(0)]).t()
|
54 |
+
sampler = torch_data.DistributedSampler(test_triplets, world_size, rank)
|
55 |
+
test_loader = torch_data.DataLoader(test_triplets, batch_size, sampler=sampler)
|
56 |
+
|
57 |
+
model.eval()
|
58 |
+
rankings = []
|
59 |
+
num_negatives = []
|
60 |
+
tail_rankings, num_tail_negs = [], [] # for explicit tail-only evaluation needed for 5 datasets
|
61 |
+
for batch in test_loader:
|
62 |
+
t_batch, h_batch = tasks.all_negative(test_data, batch)
|
63 |
+
t_pred = model(test_data, t_batch)
|
64 |
+
h_pred = model(test_data, h_batch)
|
65 |
+
|
66 |
+
if filtered_data is None:
|
67 |
+
t_mask, h_mask = tasks.strict_negative_mask(test_data, batch)
|
68 |
+
else:
|
69 |
+
t_mask, h_mask = tasks.strict_negative_mask(filtered_data, batch)
|
70 |
+
pos_h_index, pos_t_index, pos_r_index = batch.t()
|
71 |
+
t_ranking = tasks.compute_ranking(t_pred, pos_t_index, t_mask)
|
72 |
+
h_ranking = tasks.compute_ranking(h_pred, pos_h_index, h_mask)
|
73 |
+
num_t_negative = t_mask.sum(dim=-1)
|
74 |
+
num_h_negative = h_mask.sum(dim=-1)
|
75 |
+
|
76 |
+
rankings += [t_ranking, h_ranking]
|
77 |
+
num_negatives += [num_t_negative, num_h_negative]
|
78 |
+
|
79 |
+
tail_rankings += [t_ranking]
|
80 |
+
num_tail_negs += [num_t_negative]
|
81 |
+
|
82 |
+
ranking = torch.cat(rankings)
|
83 |
+
num_negative = torch.cat(num_negatives)
|
84 |
+
all_size = torch.zeros(world_size, dtype=torch.long, device=device)
|
85 |
+
all_size[rank] = len(ranking)
|
86 |
+
|
87 |
+
# ugly repetitive code for tail-only ranks processing
|
88 |
+
tail_ranking = torch.cat(tail_rankings)
|
89 |
+
num_tail_neg = torch.cat(num_tail_negs)
|
90 |
+
all_size_t = torch.zeros(world_size, dtype=torch.long, device=device)
|
91 |
+
all_size_t[rank] = len(tail_ranking)
|
92 |
+
if world_size > 1:
|
93 |
+
dist.all_reduce(all_size, op=dist.ReduceOp.SUM)
|
94 |
+
dist.all_reduce(all_size_t, op=dist.ReduceOp.SUM)
|
95 |
+
|
96 |
+
# obtaining all ranks
|
97 |
+
cum_size = all_size.cumsum(0)
|
98 |
+
all_ranking = torch.zeros(all_size.sum(), dtype=torch.long, device=device)
|
99 |
+
all_ranking[cum_size[rank] - all_size[rank]: cum_size[rank]] = ranking
|
100 |
+
all_num_negative = torch.zeros(all_size.sum(), dtype=torch.long, device=device)
|
101 |
+
all_num_negative[cum_size[rank] - all_size[rank]: cum_size[rank]] = num_negative
|
102 |
+
|
103 |
+
# the same for tails-only ranks
|
104 |
+
cum_size_t = all_size_t.cumsum(0)
|
105 |
+
all_ranking_t = torch.zeros(all_size_t.sum(), dtype=torch.long, device=device)
|
106 |
+
all_ranking_t[cum_size_t[rank] - all_size_t[rank]: cum_size_t[rank]] = tail_ranking
|
107 |
+
all_num_negative_t = torch.zeros(all_size_t.sum(), dtype=torch.long, device=device)
|
108 |
+
all_num_negative_t[cum_size_t[rank] - all_size_t[rank]: cum_size_t[rank]] = num_tail_neg
|
109 |
+
if world_size > 1:
|
110 |
+
dist.all_reduce(all_ranking, op=dist.ReduceOp.SUM)
|
111 |
+
dist.all_reduce(all_num_negative, op=dist.ReduceOp.SUM)
|
112 |
+
dist.all_reduce(all_ranking_t, op=dist.ReduceOp.SUM)
|
113 |
+
dist.all_reduce(all_num_negative_t, op=dist.ReduceOp.SUM)
|
114 |
+
|
115 |
+
metrics = {}
|
116 |
+
if rank == 0:
|
117 |
+
for metric in eval_metrics:
|
118 |
+
if "-tail" in metric:
|
119 |
+
_metric_name, direction = metric.split("-")
|
120 |
+
if direction != "tail":
|
121 |
+
raise ValueError("Only tail metric is supported in this mode")
|
122 |
+
_ranking = all_ranking_t
|
123 |
+
_num_neg = all_num_negative_t
|
124 |
+
else:
|
125 |
+
_ranking = all_ranking
|
126 |
+
_num_neg = all_num_negative
|
127 |
+
_metric_name = metric
|
128 |
+
|
129 |
+
if _metric_name == "mr":
|
130 |
+
score = _ranking.float().mean()
|
131 |
+
elif _metric_name == "mrr":
|
132 |
+
score = (1 / _ranking.float()).mean()
|
133 |
+
elif _metric_name.startswith("hits@"):
|
134 |
+
values = _metric_name[5:].split("_")
|
135 |
+
threshold = int(values[0])
|
136 |
+
if len(values) > 1:
|
137 |
+
num_sample = int(values[1])
|
138 |
+
# unbiased estimation
|
139 |
+
fp_rate = (_ranking - 1).float() / _num_neg
|
140 |
+
score = 0
|
141 |
+
for i in range(threshold):
|
142 |
+
# choose i false positive from num_sample - 1 negatives
|
143 |
+
num_comb = math.factorial(num_sample - 1) / \
|
144 |
+
math.factorial(i) / math.factorial(num_sample - i - 1)
|
145 |
+
score += num_comb * (fp_rate ** i) * ((1 - fp_rate) ** (num_sample - i - 1))
|
146 |
+
score = score.mean()
|
147 |
+
else:
|
148 |
+
score = (_ranking <= threshold).float().mean()
|
149 |
+
logger.warning("%s: %g" % (metric, score))
|
150 |
+
metrics[metric] = score
|
151 |
+
mrr = (1 / all_ranking.float()).mean()
|
152 |
+
|
153 |
+
return mrr if not return_metrics else metrics
|
ultra/layers.py
CHANGED
@@ -177,7 +177,7 @@ class GeneralizedRelationalConv(MessagePassing):
|
|
177 |
# fused computation of message and aggregate steps with the custom rspmm cuda kernel
|
178 |
# speed up computation by several times
|
179 |
# reduce memory complexity from O(|E|d) to O(|V|d), so we can apply it to larger graphs
|
180 |
-
from .rspmm import generalized_rspmm
|
181 |
|
182 |
batch_size, num_node = input.shape[:2]
|
183 |
input = input.transpose(0, 1).flatten(1)
|
|
|
177 |
# fused computation of message and aggregate steps with the custom rspmm cuda kernel
|
178 |
# speed up computation by several times
|
179 |
# reduce memory complexity from O(|E|d) to O(|V|d), so we can apply it to larger graphs
|
180 |
+
from ultra.rspmm.rspmm import generalized_rspmm
|
181 |
|
182 |
batch_size, num_node = input.shape[:2]
|
183 |
input = input.transpose(0, 1).flatten(1)
|
ultra/util.py
CHANGED
@@ -109,6 +109,13 @@ def get_device(cfg):
|
|
109 |
device = torch.device("cpu")
|
110 |
return device
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
def create_working_directory(cfg):
|
114 |
file_name = "working_dir.tmp"
|
|
|
109 |
device = torch.device("cpu")
|
110 |
return device
|
111 |
|
112 |
+
def get_devices(gpus):
|
113 |
+
if gpus is not None:
|
114 |
+
device = torch.device(gpus[get_rank()])
|
115 |
+
else:
|
116 |
+
device = torch.device("cpu")
|
117 |
+
return device
|
118 |
+
|
119 |
|
120 |
def create_working_directory(cfg):
|
121 |
file_name = "working_dir.tmp"
|