Update README.md
Browse files
README.md
CHANGED
@@ -38,11 +38,12 @@ We evaluate GRM on the [reward model benchmark](https://huggingface.co/spaces/al
|
|
38 |
import torch
|
39 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
40 |
|
|
|
41 |
# load model and tokenizer
|
42 |
tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-llama3-8B-sftreg')
|
43 |
reward_model = AutoModelForSequenceClassification.from_pretrained(
|
44 |
'Ray2333/GRM-llama3-8B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
|
45 |
-
device_map=
|
46 |
)
|
47 |
message = [
|
48 |
{'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
|
@@ -55,8 +56,9 @@ kwargs = {"padding": 'max_length', "truncation": True, "return_tensors": "pt"}
|
|
55 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
56 |
|
57 |
with torch.no_grad():
|
58 |
-
_, _, reward_tensor =
|
59 |
reward = reward_tensor.cpu().detach().item()
|
|
|
60 |
```
|
61 |
|
62 |
|
|
|
38 |
import torch
|
39 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
40 |
|
41 |
+
device = 'cuda:2'
|
42 |
# load model and tokenizer
|
43 |
tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-llama3-8B-sftreg')
|
44 |
reward_model = AutoModelForSequenceClassification.from_pretrained(
|
45 |
'Ray2333/GRM-llama3-8B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
|
46 |
+
device_map=device,
|
47 |
)
|
48 |
message = [
|
49 |
{'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
|
|
|
56 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
57 |
|
58 |
with torch.no_grad():
|
59 |
+
_, _, reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))
|
60 |
reward = reward_tensor.cpu().detach().item()
|
61 |
+
|
62 |
```
|
63 |
|
64 |
|