File size: 6,903 Bytes

94011a1

# hop_interventions.py
# Author: Julie Kallini

# For importing utils
import sys
sys.path.append("..")

# align-transformers
PATH_TO_ALIGN_TRANSFORMERS = "/nlp/scr/kallini/align-transformers/"
sys.path.append(PATH_TO_ALIGN_TRANSFORMERS)

import pandas as pd
from models.utils import embed_to_distrib
from models.configuration_alignable_model import AlignableRepresentationConfig, AlignableConfig
from models.alignable_base import AlignableModel
from models.interventions import VanillaIntervention
from utils import CHECKPOINT_READ_PATH, marker_sg_token, marker_pl_token, \
    PERTURBATIONS, PAREN_MODELS
from tqdm import tqdm
from transformers import GPT2Model
from gpt2_no_positional_encoding_model import GPT2NoPositionalEncodingModel
import os
import torch
import argparse


MAX_TRAINING_STEPS = 3000
CHECKPOINTS = list(range(100, MAX_TRAINING_STEPS+1, 100))


def simple_position_config(model_type, intervention_type, layer):
    alignable_config = AlignableConfig(
        alignable_model_type=model_type,
        alignable_representations=[
            AlignableRepresentationConfig(
                layer,             # layer
                intervention_type,  # intervention type
                "pos",             # intervention unit
                1                  # max number of unit
            ),
        ],
        alignable_interventions_type=VanillaIntervention,
    )
    return alignable_config


def get_model(perturbation_type, train_set, seed, paren_model, ckpt, no_pos_encodings=False):

    # Get path to model
    no_pos_encodings = "_no_positional_encodings" if no_pos_encodings else ""
    model = f"babylm_{perturbation_type}_{train_set}_{paren_model}{no_pos_encodings}_seed{seed}"
    model_path = f"{CHECKPOINT_READ_PATH}/babylm_{perturbation_type}_{train_set}_{paren_model}{no_pos_encodings}/{model}/runs/{model}/checkpoint-{ckpt}"

    # Load appropriate GPT-2 model
    if no_pos_encodings:
        return GPT2NoPositionalEncodingModel.from_pretrained(model_path).to(device)
    else:
        return GPT2Model.from_pretrained(model_path).to(device)


def run_interventions(model, base_input_ids, source_input_ids):

    tokens = [marker_sg_token, marker_pl_token]

    data = []
    BATCH_SIZE = 16
    for batch_i in tqdm(range(0, len(base_input_ids), BATCH_SIZE)):

        # Get base and source batches
        base_batch = base_input_ids[batch_i:batch_i+BATCH_SIZE]
        source_batch = source_input_ids[batch_i:batch_i+BATCH_SIZE]

        # Iterate over GPT-2 layers
        for layer_i in range(model.config.n_layer):

            # Get block_output config for this layer
            alignable_config = simple_position_config(
                type(model), "block_output", layer_i)
            alignable = AlignableModel(alignable_config, model)

            # Iterate over token positions
            for pos_i in range(len(base_batch[0])):

                _, counterfactual_outputs = alignable(
                    {"input_ids": torch.tensor(base_batch).to(device)},
                    [{"input_ids": torch.tensor(source_batch).to(device)}],
                    {"sources->base": ([[[pos_i]] * len(base_batch)],
                                       [[[pos_i]] * len(base_batch)])}
                )
                distrib = embed_to_distrib(
                    model, counterfactual_outputs.last_hidden_state,
                    logits=False
                )
                for i in range(len(base_batch)):
                    for token in tokens:
                        data.append({
                            'example': batch_i + i,
                            'token': token,
                            'prob': float(distrib[i][-1][token]),
                            'layer': layer_i,
                            'pos': pos_i,
                            'type': "block_output"
                        })
    return pd.DataFrame(data)


if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        prog='Run intervention tests for subject-verb agreement on hop models',
        description='Run interventions for subject-verb agreement on hop models')
    parser.add_argument('perturbation_type',
                        default='all',
                        const='all',
                        nargs='?',
                        choices=PERTURBATIONS.keys(),
                        help='Perturbation function used to transform BabyLM dataset')
    parser.add_argument('train_set',
                        default='all',
                        const='all',
                        nargs='?',
                        choices=["100M", "10M"],
                        help='BabyLM train set')
    parser.add_argument('random_seed', type=int, help="Random seed")
    parser.add_argument('paren_model',
                        default='all',
                        const='all',
                        nargs='?',
                        choices=list(PAREN_MODELS.keys()) + ["randinit"],
                        help='Parenthesis model')
    parser.add_argument('-np', '--no_pos_encodings', action='store_true',
                        help="Train GPT-2 with no positional encodings")

    # Get args
    args = parser.parse_args()

    if "hop" not in args.perturbation_type:
        raise Exception(
            "'{args.perturbation_type}' is not a valid hop perturbation")

    # Get examples to run interventions
    data_df = pd.read_csv("hop_agreement_data.csv")
    bases = [[int(tok) for tok in seq.split()]
             for seq in list(data_df["Singular"])]
    sources = [[int(tok) for tok in seq.split()]
               for seq in list(data_df["Plural"])]

    # Only get first three tokens of each example for control model
    if args.perturbation_type == "hop_control":
        bases = [row[:3] for row in bases]
        sources = [row[:3] for row in sources]

    # Get model and run intervention experiments
    device = "cuda"
    result_df = None
    for ckpt in CHECKPOINTS:
        print(f"Checkpoint: {ckpt}")
        model = get_model(args.perturbation_type, args.train_set,
                        args.random_seed, args.paren_model, ckpt,
                        args.no_pos_encodings)
        if result_df is None:
            result_df = run_interventions(model, bases, sources)
            result_df["ckpt"] = ckpt
        else:
            ckpt_df = run_interventions(model, bases, sources)
            ckpt_df["ckpt"] = ckpt
            result_df = pd.concat((result_df, ckpt_df), axis=0)

    # Create directory for results
    nps = '_no_pos_encodings' if args.no_pos_encodings else ''
    result_directory = f"hop_intervention_results/{args.perturbation_type}_{args.train_set}{nps}/"
    if not os.path.exists(result_directory):
        os.makedirs(result_directory)

    # Write results
    result_df.to_csv(result_directory + f"{args.paren_model}_seed{args.random_seed}.csv", index=False)