how to conduct exl2 measurements?
#1
by
icoderzqliu
- opened
What's the detail of conducting exl2 measurements?
Just measure as usual:
python convert.py -i model_dir -o tmp_dir -om model_dir/measurement.json
Then list the layer bpw for the top solution:
diff --git a/conversion/optimize.py b/conversion/optimize.py
index b346855..6d1ebf6 100644
--- a/conversion/optimize.py
+++ b/conversion/optimize.py
@@ -1,6 +1,7 @@
from conversion.qparams import QParams
import math
import itertools
+import json
def optimize(job, save_fn, model):
@@ -207,6 +208,7 @@ def optimize(job, save_fn, model):
print(" -- Quantization strategy:")
job["strategy"] = {}
+ layer_bpw = {}
for layer_ in range(num_layers):
k1 = "model.layers." + str(layer_) + ".self_attn"
@@ -214,10 +216,23 @@ def optimize(job, save_fn, model):
p1 = params[layer_ * 2][f_solution[layer_ * 2]]
p2 = params[layer_ * 2 + 1][f_solution[layer_ * 2 + 1]]
+ total_bpw = 0
for (k, p, n) in zip((k1, k2), (p1, p2), (numel_attn, numel_mlp)):
job["strategy"][k] = p
bpw = p["total_bits"] / n
+ total_bpw += bpw
err = 1 - p["accuracy"]
print(f" -- {k:50} {bpw:1.4f} bpw - exp. error: {err:1.8f}")
-
- xx = 0
\ No newline at end of file
+ layer_bpw[layer_] = total_bpw
+
+ t = 0
+ best = []
+ layers = []
+ for i, bpw in reversed(sorted(layer_bpw.items(), key=lambda x: x[1])):
+ print(f"Layer: {i} {bpw}")
+ layers.append(i)
+
+ with open('layer_rank.json', 'w') as f:
+ json.dump(layers, f)
+ xx = 0
Just measure as usual:
python convert.py -i model_dir -o tmp_dir -om model_dir/measurement.json
Then list the layer bpw for the top solution:
diff --git a/conversion/optimize.py b/conversion/optimize.py index b346855..6d1ebf6 100644 --- a/conversion/optimize.py +++ b/conversion/optimize.py @@ -1,6 +1,7 @@ from conversion.qparams import QParams import math import itertools +import json def optimize(job, save_fn, model): @@ -207,6 +208,7 @@ def optimize(job, save_fn, model): print(" -- Quantization strategy:") job["strategy"] = {} + layer_bpw = {} for layer_ in range(num_layers): k1 = "model.layers." + str(layer_) + ".self_attn" @@ -214,10 +216,23 @@ def optimize(job, save_fn, model): p1 = params[layer_ * 2][f_solution[layer_ * 2]] p2 = params[layer_ * 2 + 1][f_solution[layer_ * 2 + 1]] + total_bpw = 0 for (k, p, n) in zip((k1, k2), (p1, p2), (numel_attn, numel_mlp)): job["strategy"][k] = p bpw = p["total_bits"] / n + total_bpw += bpw err = 1 - p["accuracy"] print(f" -- {k:50} {bpw:1.4f} bpw - exp. error: {err:1.8f}") - - xx = 0 \ No newline at end of file + layer_bpw[layer_] = total_bpw + + t = 0 + best = [] + layers = [] + for i, bpw in reversed(sorted(layer_bpw.items(), key=lambda x: x[1])): + print(f"Layer: {i} {bpw}") + layers.append(i) + + with open('layer_rank.json', 'w') as f: + json.dump(layers, f) + xx = 0
Thank you for your reply! Where can i get this convert.py file?