Spaces:
Running
Running
Update my_model/LLAMA2/LLAMA2_model.py
Browse files- my_model/LLAMA2/LLAMA2_model.py +22 -11
my_model/LLAMA2/LLAMA2_model.py
CHANGED
@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
3 |
from typing import Optional
|
4 |
import bitsandbytes # only for using on GPU
|
5 |
import accelerate # only for using on GPU
|
6 |
-
from my_model.config import LLAMA2_config as config
|
7 |
import warnings
|
8 |
|
9 |
# Suppress only FutureWarning from transformers
|
@@ -32,6 +32,7 @@ class Llama2ModelManager:
|
|
32 |
"""
|
33 |
Initializes the Llama2ModelManager class with configuration settings.
|
34 |
"""
|
|
|
35 |
self.device: str = config.DEVICE
|
36 |
self.model_name: str = config.MODEL_NAME
|
37 |
self.tokenizer_name: str = config.TOKENIZER_NAME
|
@@ -51,6 +52,7 @@ class Llama2ModelManager:
|
|
51 |
Returns:
|
52 |
BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
|
53 |
"""
|
|
|
54 |
if self.quantization == '4bit':
|
55 |
return BitsAndBytesConfig(
|
56 |
load_in_4bit=True,
|
@@ -68,11 +70,13 @@ class Llama2ModelManager:
|
|
68 |
|
69 |
def load_model(self) -> AutoModelForCausalLM:
|
70 |
"""
|
71 |
-
Loads the LLaMA-2 model based on the specified configuration.
|
|
|
72 |
|
73 |
Returns:
|
74 |
AutoModelForCausalLM: Loaded LLaMA-2 model.
|
75 |
"""
|
|
|
76 |
if self.model is not None:
|
77 |
print("Model is already loaded.")
|
78 |
return self.model
|
@@ -99,6 +103,7 @@ class Llama2ModelManager:
|
|
99 |
Returns:
|
100 |
AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
|
101 |
"""
|
|
|
102 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
|
103 |
token=self.access_token,
|
104 |
trust_remote_code=self.trust_remote,
|
@@ -111,12 +116,17 @@ class Llama2ModelManager:
|
|
111 |
|
112 |
return self.tokenizer
|
113 |
|
114 |
-
def load_model_and_tokenizer(self, for_fine_tuning):
|
115 |
"""
|
116 |
-
Loads
|
117 |
-
|
118 |
-
:
|
|
|
|
|
|
|
|
|
119 |
"""
|
|
|
120 |
if for_fine_tuning:
|
121 |
self.tokenizer = self.load_tokenizer()
|
122 |
self.model = self.load_model()
|
@@ -128,17 +138,17 @@ class Llama2ModelManager:
|
|
128 |
return self.model, self.tokenizer
|
129 |
|
130 |
|
131 |
-
def add_special_tokens(self, tokens: Optional[
|
132 |
"""
|
133 |
-
Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded
|
134 |
-
only if the tokenizer is loaded.
|
135 |
|
136 |
Args:
|
137 |
-
tokens (
|
138 |
|
139 |
Returns:
|
140 |
None
|
141 |
"""
|
|
|
142 |
if self.tokenizer is None:
|
143 |
print("Tokenizer is not loaded. Cannot add special tokens.")
|
144 |
return
|
@@ -166,7 +176,8 @@ class Llama2ModelManager:
|
|
166 |
|
167 |
|
168 |
if __name__ == "__main__":
|
169 |
-
pass
|
|
|
170 |
LLAMA2_manager = Llama2ModelManager()
|
171 |
LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
|
172 |
LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
|
|
|
3 |
from typing import Optional
|
4 |
import bitsandbytes # only for using on GPU
|
5 |
import accelerate # only for using on GPU
|
6 |
+
from my_model.config import LLAMA2_config as config
|
7 |
import warnings
|
8 |
|
9 |
# Suppress only FutureWarning from transformers
|
|
|
32 |
"""
|
33 |
Initializes the Llama2ModelManager class with configuration settings.
|
34 |
"""
|
35 |
+
|
36 |
self.device: str = config.DEVICE
|
37 |
self.model_name: str = config.MODEL_NAME
|
38 |
self.tokenizer_name: str = config.TOKENIZER_NAME
|
|
|
52 |
Returns:
|
53 |
BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
|
54 |
"""
|
55 |
+
|
56 |
if self.quantization == '4bit':
|
57 |
return BitsAndBytesConfig(
|
58 |
load_in_4bit=True,
|
|
|
70 |
|
71 |
def load_model(self) -> AutoModelForCausalLM:
|
72 |
"""
|
73 |
+
Loads the LLaMA-2 model based on the specified configuration.
|
74 |
+
If the model is already loaded, returns the existing model.
|
75 |
|
76 |
Returns:
|
77 |
AutoModelForCausalLM: Loaded LLaMA-2 model.
|
78 |
"""
|
79 |
+
|
80 |
if self.model is not None:
|
81 |
print("Model is already loaded.")
|
82 |
return self.model
|
|
|
103 |
Returns:
|
104 |
AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
|
105 |
"""
|
106 |
+
|
107 |
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
|
108 |
token=self.access_token,
|
109 |
trust_remote_code=self.trust_remote,
|
|
|
116 |
|
117 |
return self.tokenizer
|
118 |
|
119 |
+
def load_model_and_tokenizer(self, for_fine_tuning: bool) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
120 |
"""
|
121 |
+
Loads the LLaMA-2 model and tokenizer, and optionally adds special tokens for fine-tuning.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
for_fine_tuning (bool): Whether to prepare the model and tokenizer for fine-tuning.
|
125 |
+
|
126 |
+
Returns:
|
127 |
+
Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
|
128 |
"""
|
129 |
+
|
130 |
if for_fine_tuning:
|
131 |
self.tokenizer = self.load_tokenizer()
|
132 |
self.model = self.load_model()
|
|
|
138 |
return self.model, self.tokenizer
|
139 |
|
140 |
|
141 |
+
def add_special_tokens(self, tokens: Optional[List[str]] = None) -> None:
|
142 |
"""
|
143 |
+
Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded.
|
|
|
144 |
|
145 |
Args:
|
146 |
+
tokens (Optional[List[str]]): Special tokens to add. Defaults to a predefined set.
|
147 |
|
148 |
Returns:
|
149 |
None
|
150 |
"""
|
151 |
+
|
152 |
if self.tokenizer is None:
|
153 |
print("Tokenizer is not loaded. Cannot add special tokens.")
|
154 |
return
|
|
|
176 |
|
177 |
|
178 |
if __name__ == "__main__":
|
179 |
+
pass # uncomment to to load the mode and tokenizer and add the designed special tokens.
|
180 |
+
|
181 |
LLAMA2_manager = Llama2ModelManager()
|
182 |
LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
|
183 |
LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
|