Weirdly, this model got trained fully without any issue. I trained the model and loaded the adapters successfully. But the moment I try to infer from it using loaded_model(**tokenizer("hello", return_tensors = "pt").to("cuda:0")), it throws some error:

NOTE: When I use EXACTLY same Adapters on Base 3.1-8B model, it runs smoothly as expected like always

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 model(**tokenizer("hello", return_tensors = "pt").to("cuda:0"))

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/peft/peft_model.py:902, in PeftModelForSequenceClassification.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
    900     if peft_config.peft_type == PeftType.POLY:
    901         kwargs["task_ids"] = task_ids
--> 902     return self.base_model(
    903         input_ids=input_ids,
    904         attention_mask=attention_mask,
    905         inputs_embeds=inputs_embeds,
    906         labels=labels,
    907         output_attentions=output_attentions,
    908         output_hidden_states=output_hidden_states,
    909         return_dict=return_dict,
    910         **kwargs,
    911     )
    913 batch_size = _get_batch_size(input_ids, inputs_embeds)
    914 if attention_mask is not None:
    915     # concat prompt attention mask

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
   1516     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1517 else:
-> 1518     return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
   1522 # If we don't have any hooks, we want to skip the rest of the logic in
   1523 # this function, and just call forward.
   1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1525         or _global_backward_pre_hooks or _global_backward_hooks
   1526         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527     return forward_call(*args, **kwargs)
   1529 try:
   1530     result = None

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:160, in BaseTuner.forward(self, *args, **kwargs)
    159 def forward(self, *args: Any, **kwargs: Any):
--> 160     return self.model.forward(*args, **kwargs)

File ~/anaconda3/envs/train_env/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1312, in LlamaForSequenceClassification.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
   1309 else:
   1310     if input_ids is not None:
   1311         # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
-> 1312         sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
   1313         sequence_lengths = sequence_lengths % input_ids.shape[-1]
   1314         sequence_lengths = sequence_lengths.to(logits.device)

TypeError: eq() received an invalid combination of arguments - got (Tensor, list), but expected one of:
 * (Tensor input, Tensor other, *, Tensor out)
 * (Tensor input, Number other, *, Tensor out)

Here is the code I use to load the model:

def load_model(lora_path, device, num_labels, merge_unload = False):
    
    model = AutoModelForSequenceClassification.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct",
                                                          token= "hf_..........",
                                                          trust_remote_code=True, 
                                                          device_map = device, 
                                                          torch_dtype=torch.bfloat16,
                                                          # attn_implementation = "flash_attention_2", 
                                                          num_labels = num_labels)
    
    if model.config.pad_token_id is None:
        model.config.pad_token_id = model.config.eos_token_id
    
    peft_model = PeftModel.from_pretrained(model,
                                           lora_path, 
                                           device_map = device)
    
    peft_model = peft_model.eval().to(device)
    if merge_unload: peft_model = peft_model.merge_and_unload()
    
    return peft_model

meta-llama
/

Llama-3.1-8B-Instruct

Throwing Error for AutoModelForSequence Classification

Here is the code I use to load the model: