Ray2333 commited on
Commit
3a31cf2
1 Parent(s): 3bcb014

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -2
README.md CHANGED
@@ -38,11 +38,12 @@ We evaluate GRM on the [reward model benchmark](https://huggingface.co/spaces/al
38
  import torch
39
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
40
 
 
41
  # load model and tokenizer
42
  tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-llama3-8B-sftreg')
43
  reward_model = AutoModelForSequenceClassification.from_pretrained(
44
  'Ray2333/GRM-llama3-8B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
45
- device_map=0,
46
  )
47
  message = [
48
  {'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
@@ -55,8 +56,9 @@ kwargs = {"padding": 'max_length', "truncation": True, "return_tensors": "pt"}
55
  tokens = tokenizer.encode_plus(message_template, **kwargs)
56
 
57
  with torch.no_grad():
58
- _, _, reward_tensor = model(tokens["input_ids"][0].to(model.device), attention_mask=tokens["attention_mask"][0].to(model.device)).logits.reshape(-1)
59
  reward = reward_tensor.cpu().detach().item()
 
60
  ```
61
 
62
 
 
38
  import torch
39
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
40
 
41
+ device = 'cuda:2'
42
  # load model and tokenizer
43
  tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-llama3-8B-sftreg')
44
  reward_model = AutoModelForSequenceClassification.from_pretrained(
45
  'Ray2333/GRM-llama3-8B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
46
+ device_map=device,
47
  )
48
  message = [
49
  {'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
 
56
  tokens = tokenizer.encode_plus(message_template, **kwargs)
57
 
58
  with torch.no_grad():
59
+ _, _, reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))
60
  reward = reward_tensor.cpu().detach().item()
61
+
62
  ```
63
 
64