Impossible_llm / hop_interventions /hop_interventions.py
Yaning1001's picture
Add files using upload-large-folder tool
94011a1 verified
# hop_interventions.py
# Author: Julie Kallini
# For importing utils
import sys
sys.path.append("..")
# align-transformers
PATH_TO_ALIGN_TRANSFORMERS = "/nlp/scr/kallini/align-transformers/"
sys.path.append(PATH_TO_ALIGN_TRANSFORMERS)
import pandas as pd
from models.utils import embed_to_distrib
from models.configuration_alignable_model import AlignableRepresentationConfig, AlignableConfig
from models.alignable_base import AlignableModel
from models.interventions import VanillaIntervention
from utils import CHECKPOINT_READ_PATH, marker_sg_token, marker_pl_token, \
PERTURBATIONS, PAREN_MODELS
from tqdm import tqdm
from transformers import GPT2Model
from gpt2_no_positional_encoding_model import GPT2NoPositionalEncodingModel
import os
import torch
import argparse
MAX_TRAINING_STEPS = 3000
CHECKPOINTS = list(range(100, MAX_TRAINING_STEPS+1, 100))
def simple_position_config(model_type, intervention_type, layer):
alignable_config = AlignableConfig(
alignable_model_type=model_type,
alignable_representations=[
AlignableRepresentationConfig(
layer, # layer
intervention_type, # intervention type
"pos", # intervention unit
1 # max number of unit
),
],
alignable_interventions_type=VanillaIntervention,
)
return alignable_config
def get_model(perturbation_type, train_set, seed, paren_model, ckpt, no_pos_encodings=False):
# Get path to model
no_pos_encodings = "_no_positional_encodings" if no_pos_encodings else ""
model = f"babylm_{perturbation_type}_{train_set}_{paren_model}{no_pos_encodings}_seed{seed}"
model_path = f"{CHECKPOINT_READ_PATH}/babylm_{perturbation_type}_{train_set}_{paren_model}{no_pos_encodings}/{model}/runs/{model}/checkpoint-{ckpt}"
# Load appropriate GPT-2 model
if no_pos_encodings:
return GPT2NoPositionalEncodingModel.from_pretrained(model_path).to(device)
else:
return GPT2Model.from_pretrained(model_path).to(device)
def run_interventions(model, base_input_ids, source_input_ids):
tokens = [marker_sg_token, marker_pl_token]
data = []
BATCH_SIZE = 16
for batch_i in tqdm(range(0, len(base_input_ids), BATCH_SIZE)):
# Get base and source batches
base_batch = base_input_ids[batch_i:batch_i+BATCH_SIZE]
source_batch = source_input_ids[batch_i:batch_i+BATCH_SIZE]
# Iterate over GPT-2 layers
for layer_i in range(model.config.n_layer):
# Get block_output config for this layer
alignable_config = simple_position_config(
type(model), "block_output", layer_i)
alignable = AlignableModel(alignable_config, model)
# Iterate over token positions
for pos_i in range(len(base_batch[0])):
_, counterfactual_outputs = alignable(
{"input_ids": torch.tensor(base_batch).to(device)},
[{"input_ids": torch.tensor(source_batch).to(device)}],
{"sources->base": ([[[pos_i]] * len(base_batch)],
[[[pos_i]] * len(base_batch)])}
)
distrib = embed_to_distrib(
model, counterfactual_outputs.last_hidden_state,
logits=False
)
for i in range(len(base_batch)):
for token in tokens:
data.append({
'example': batch_i + i,
'token': token,
'prob': float(distrib[i][-1][token]),
'layer': layer_i,
'pos': pos_i,
'type': "block_output"
})
return pd.DataFrame(data)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='Run intervention tests for subject-verb agreement on hop models',
description='Run interventions for subject-verb agreement on hop models')
parser.add_argument('perturbation_type',
default='all',
const='all',
nargs='?',
choices=PERTURBATIONS.keys(),
help='Perturbation function used to transform BabyLM dataset')
parser.add_argument('train_set',
default='all',
const='all',
nargs='?',
choices=["100M", "10M"],
help='BabyLM train set')
parser.add_argument('random_seed', type=int, help="Random seed")
parser.add_argument('paren_model',
default='all',
const='all',
nargs='?',
choices=list(PAREN_MODELS.keys()) + ["randinit"],
help='Parenthesis model')
parser.add_argument('-np', '--no_pos_encodings', action='store_true',
help="Train GPT-2 with no positional encodings")
# Get args
args = parser.parse_args()
if "hop" not in args.perturbation_type:
raise Exception(
"'{args.perturbation_type}' is not a valid hop perturbation")
# Get examples to run interventions
data_df = pd.read_csv("hop_agreement_data.csv")
bases = [[int(tok) for tok in seq.split()]
for seq in list(data_df["Singular"])]
sources = [[int(tok) for tok in seq.split()]
for seq in list(data_df["Plural"])]
# Only get first three tokens of each example for control model
if args.perturbation_type == "hop_control":
bases = [row[:3] for row in bases]
sources = [row[:3] for row in sources]
# Get model and run intervention experiments
device = "cuda"
result_df = None
for ckpt in CHECKPOINTS:
print(f"Checkpoint: {ckpt}")
model = get_model(args.perturbation_type, args.train_set,
args.random_seed, args.paren_model, ckpt,
args.no_pos_encodings)
if result_df is None:
result_df = run_interventions(model, bases, sources)
result_df["ckpt"] = ckpt
else:
ckpt_df = run_interventions(model, bases, sources)
ckpt_df["ckpt"] = ckpt
result_df = pd.concat((result_df, ckpt_df), axis=0)
# Create directory for results
nps = '_no_pos_encodings' if args.no_pos_encodings else ''
result_directory = f"hop_intervention_results/{args.perturbation_type}_{args.train_set}{nps}/"
if not os.path.exists(result_directory):
os.makedirs(result_directory)
# Write results
result_df.to_csv(result_directory + f"{args.paren_model}_seed{args.random_seed}.csv", index=False)