# Modified from Huggingface trl package AutoModelForCausalLMWithValueHead class # Enabling better customization for generalizable reward modeling import torch import torch.nn as nn import os from transformers import AutoModelForCausalLM from trl import PreTrainedModelWrapper from peft import PeftModel, PeftConfig from safetensors import safe_open class ValueHead(nn.Module): def __init__(self, config, **kwargs): super().__init__() if not hasattr(config, "summary_dropout_prob"): summary_dropout_prob = kwargs.pop("summary_dropout_prob", 0.1) else: summary_dropout_prob = config.summary_dropout_prob self.dropout = nn.Dropout(summary_dropout_prob) if summary_dropout_prob else nn.Identity() # some models such as OPT have a projection layer before the word embeddings - e.g. OPT-350m if hasattr(config, "hidden_size"): hidden_size = config.hidden_size if hasattr(config, "word_embed_proj_dim"): hidden_size = config.word_embed_proj_dim elif hasattr(config, "is_encoder_decoder"): if config.is_encoder_decoder and hasattr(config, "decoder"): if hasattr(config.decoder, "hidden_size"): hidden_size = config.decoder.hidden_size # get vhead config if hasattr(config, "vhead_layer_type"): # config from json first self.layer_type = config.vhead_layer_type else: self.layer_type = kwargs.pop("vhead_layer_type", 'mlp') if hasattr(config, 'vhead_num_neurons'): num_neurons = config.vhead_num_neurons else: num_neurons = kwargs.pop("vhead_num_neurons", 1024) if hasattr(config, 'vhead_num_layers'): num_layers = config.vhead_num_layers else: num_layers = kwargs.pop("vhead_num_layers", 1) if self.layer_type == 'linear': self.summary = nn.Linear(hidden_size, 1) else: module_lis = [] input_neurons = hidden_size for i in range(num_layers): module_lis.extend([nn.Linear(input_neurons, num_neurons), nn.ReLU()]) input_neurons = num_neurons module_lis.append(nn.Linear(num_neurons, 1)) self.summary = nn.Sequential(*module_lis) self.flatten = nn.Flatten() def forward(self, hidden_states): output = self.dropout(hidden_states) if (self.layer_type == 'linear' and output.dtype != self.summary.weight.dtype): output = output.to(self.summary.weight.dtype) elif (self.layer_type != 'linear' and output.dtype != self.summary[0].weight.dtype): output = output.to(self.summary[0].weight.dtype) output = self.summary(output) return output class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper): transformers_parent_class = AutoModelForCausalLM lm_head_namings = ["lm_head", "embed_out"] supported_args = ( "summary_dropout_prob", "v_head_initializer_range", "v_head_init_strategy", "layer_type", 'num_neurons', 'num_layers', ) def __init__(self, pretrained_model, **kwargs): r""" Initializes the model. """ super().__init__(pretrained_model, **kwargs) v_head_kwargs, _, _ = self._split_kwargs(kwargs) if not any(hasattr(self.pretrained_model, attribute) for attribute in self.lm_head_namings): raise ValueError("The model does not have a language model head, please use a model that has one.") self.v_head = ValueHead(self.pretrained_model.config, **v_head_kwargs) self._init_weights(**v_head_kwargs) def _init_weights(self, **kwargs): r""" Initializes the weights of the value head. """ initializer_range = kwargs.pop("v_head_initializer_range", 0.2) # random init by default init_strategy = kwargs.pop("v_head_init_strategy", None) if init_strategy is None: # do nothing pass elif init_strategy == "normal": self.v_head.summary.weight.data.normal_(mean=0.0, std=initializer_range) self.v_head.summary.bias.data.zero_() def forward( self, input_ids=None, past_key_values=None, attention_mask=None, **kwargs, ): kwargs["output_hidden_states"] = True # this had already been set in the LORA / PEFT examples kwargs["past_key_values"] = past_key_values if self.is_peft_model and self.pretrained_model.active_peft_config.peft_type == "PREFIX_TUNING": kwargs.pop("past_key_values") base_model_output = self.pretrained_model( input_ids=input_ids, attention_mask=attention_mask, **kwargs, ) last_hidden_state = base_model_output.hidden_states[-1] lm_logits = base_model_output.logits loss = base_model_output.loss if (hasattr(self.v_head.summary, 'weight') and last_hidden_state.device != self.v_head.summary.weight.device): last_hidden_state = last_hidden_state.to(self.v_head.summary.weight.device) elif not hasattr(self.v_head.summary, 'weight') and (last_hidden_state.device != self.v_head.summary[0].weight.device): last_hidden_state = last_hidden_state.to(self.v_head.summary[0].weight.device) # use the last token value as reward last_index = attention_mask.sum(dim=-1) - 1 value = self.v_head(last_hidden_state).squeeze(-1)[torch.arange(len(last_hidden_state)), last_index] # force upcast in fp32 if logits are in half-precision if lm_logits.dtype != torch.float32: lm_logits = lm_logits.float() return (lm_logits, loss, value) def generate(self, *args, **kwargs): return self.pretrained_model.generate(*args, **kwargs) def state_dict(self, *args, **kwargs): pretrained_model_state_dict = self.pretrained_model.state_dict(*args, **kwargs) v_head_state_dict = self.v_head.state_dict(*args, **kwargs) for k, v in v_head_state_dict.items(): pretrained_model_state_dict[f"v_head.{k}"] = v return pretrained_model_state_dict def push_to_hub(self, *args, **kwargs): setattr(self.pretrained_model, "v_head", self.v_head) return self.pretrained_model.push_to_hub(*args, **kwargs) def post_init(self, state_dict): for k in list(state_dict.keys()): if "v_head." in k: state_dict[k.replace("v_head.", "")] = state_dict.pop(k) self.v_head.load_state_dict(state_dict, strict=False) del state_dict if hasattr(self.pretrained_model, "hf_device_map"): if ( "cpu" in self.pretrained_model.hf_device_map.values() or "disk" in self.pretrained_model.hf_device_map.values() ): raise ValueError( "The model is offloaded on CPU or disk - CPU & disk offloading is not supported for ValueHead models." ) first_device = list(set(self.pretrained_model.hf_device_map.values()))[0] self.v_head = self.v_head.to(first_device) def set_device_hook(module, input, outputs): new_output = () for output in outputs: if isinstance(output, torch.Tensor): new_output += (output.to(first_device),) else: new_output += (output,) return new_output self.register_forward_hook(set_device_hook) self.is_sequential_parallel = True @classmethod def register_for_auto_class(cls, auto_class="AutoModel"): if not isinstance(auto_class, str): auto_class = auto_class.__name__ import transformers.models.auto as auto_module if not hasattr(auto_module, auto_class): raise ValueError(f"{auto_class} is not a valid auto class.") cls._auto_class = auto_class