m7mdal7aj commited on
Commit
9485a43
·
verified ·
1 Parent(s): 6740cd3

Update my_model/fine_tuner/fine_tuner.py

Browse files
Files changed (1) hide show
  1. my_model/fine_tuner/fine_tuner.py +49 -16
my_model/fine_tuner/fine_tuner.py CHANGED
@@ -96,13 +96,12 @@ class Finetuner:
96
  - print_trainable_parameters: Prints the number of trainable parameters in the model.
97
  """
98
 
 
99
  def __init__(self, train_dataset: Dataset, eval_dataset: Dataset) -> None:
100
  """
101
  Initializes the Finetuner class with the model, tokenizer, and datasets.
102
 
103
  Args:
104
- model (AutoModelForCausalLM): The pre-trained language model.
105
- tokenizer (AutoTokenizer): The tokenizer for the model.
106
  train_dataset (Dataset): The dataset for training the model.
107
  eval_dataset (Dataset): The dataset for evaluating the model.
108
  """
@@ -111,7 +110,7 @@ class Finetuner:
111
  self.merged_model = None
112
  self.train_dataset = train_dataset
113
  self.eval_dataset = eval_dataset
114
- # please refer to config file 'fine_tuning_config.py' for training arguments description.
115
  self.training_arguments = TrainingArguments(
116
  output_dir=config.OUTPUT_DIR,
117
  num_train_epochs=config.NUM_TRAIN_EPOCHS,
@@ -135,10 +134,9 @@ class Finetuner:
135
  report_to="tensorboard"
136
  )
137
 
138
- def load_LLAMA2_for_finetuning(self):
139
  """
140
  Loads the LLAMA2 model and tokenizer, specifically configured for fine-tuning.
141
- This method ensures the model is ready to be adapted to a specific task or dataset.
142
 
143
  Returns:
144
  Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
@@ -148,6 +146,7 @@ class Finetuner:
148
  base_model, tokenizer = llm_manager.load_model_and_tokenizer(for_fine_tuning=True)
149
 
150
  return base_model, tokenizer
 
151
 
152
  def find_all_linear_names(self) -> List[str]:
153
  """
@@ -156,6 +155,7 @@ class Finetuner:
156
  Returns:
157
  List[str]: A list of linear layer names.
158
  """
 
159
  cls = bitsandbytes.nn.Linear4bit
160
  lora_module_names = set()
161
  for name, module in self.base_model.named_modules():
@@ -167,12 +167,16 @@ class Finetuner:
167
  lora_module_names -= {'lm_head', 'gate_proj'}
168
  return list(lora_module_names)
169
 
 
170
  def print_trainable_parameters(self, use_4bit: bool = False) -> None:
171
  """
172
  Calculates and prints the number of trainable parameters in the model.
173
 
174
  Args:
175
  use_4bit (bool): If true, calculates the parameter count considering 4-bit quantization.
 
 
 
176
  """
177
  trainable_params = sum(p.numel() for p in self.base_model.parameters() if p.requires_grad)
178
  if use_4bit:
@@ -188,6 +192,9 @@ class Finetuner:
188
 
189
  Args:
190
  peft_config (LoraConfig): Configuration for the PEFT training process.
 
 
 
191
  """
192
  self.base_model.config.use_cache = False
193
  # Set the pretraining_tp flag to 1 to enable the use of LoRA (Low-Rank Adapters) layers.
@@ -207,8 +214,7 @@ class Finetuner:
207
  )
208
  self.trainer.train()
209
 
210
- def save_model(self):
211
-
212
  """
213
  Saves the fine-tuned model to the specified directory.
214
 
@@ -218,12 +224,15 @@ class Finetuner:
218
  for later use or evaluation.
219
 
220
  The saved model can be easily loaded using Hugging Face's model loading utilities.
 
 
 
221
  """
222
 
223
  self.fine_tuned_adapter_name = config.ADAPTER_SAVE_NAME
224
  self.trainer.model.save_pretrained(self.fine_tuned_adapter_name)
225
 
226
- def merge_weights(self):
227
  """
228
  Merges the weights of the fine-tuned adapter with the base model.
229
 
@@ -234,18 +243,26 @@ class Finetuner:
234
  After merging, the weights of the adapter are no longer separate from the
235
  base model, enabling more efficient storage and deployment. The merged model
236
  is stored in the 'self.merged_model' attribute of the Finetuner class.
 
 
 
237
  """
238
 
239
  self.merged_model = PeftModel.from_pretrained(self.base_model, self.fine_tuned_adapter_name)
240
  self.merged_model = self.merged_model.merge_and_unload()
 
241
 
242
- def delete_model(self, model_name: str):
243
  """
244
  Deletes a specified model attribute.
245
 
246
  Args:
247
  model_name (str): The name of the model attribute to delete.
 
 
 
248
  """
 
249
  try:
250
  if hasattr(self, model_name) and getattr(self, model_name) is not None:
251
  delattr(self, model_name)
@@ -254,14 +271,19 @@ class Finetuner:
254
  print(f"Warning: Model '{model_name}' has already been cleared or does not exist.")
255
  except Exception as e:
256
  print(f"Error occurred while deleting model '{model_name}': {str(e)}")
 
257
 
258
- def delete_trainer(self, trainer_name: str):
259
  """
260
  Deletes a specified trainer object.
261
 
262
  Args:
263
  trainer_name (str): The name of the trainer object to delete.
 
 
 
264
  """
 
265
  try:
266
  if hasattr(self, trainer_name) and getattr(self, trainer_name) is not None:
267
  delattr(self, trainer_name)
@@ -271,10 +293,15 @@ class Finetuner:
271
  except Exception as e:
272
  print(f"Error occurred while deleting trainer object '{trainer_name}': {str(e)}")
273
 
274
- def clear_training_resources(self):
 
275
  """
276
  Clears GPU memory.
 
 
 
277
  """
 
278
  try:
279
  if torch.cuda.is_available():
280
  torch.cuda.empty_cache()
@@ -282,10 +309,15 @@ class Finetuner:
282
  except Exception as e:
283
  print(f"Error occurred while clearing GPU memory: {str(e)}")
284
 
285
- def clear_cache_and_collect_garbage(self):
 
286
  """
287
  Clears Hugging Face's Transformers cache and runs garbage collection.
 
 
 
288
  """
 
289
  try:
290
  if os.path.exists(TRANSFORMERS_CACHE):
291
  shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
@@ -296,7 +328,9 @@ class Finetuner:
296
  except Exception as e:
297
  print(f"Error occurred while clearing cache and collecting garbage: {str(e)}")
298
 
299
- def fine_tune(save_fine_tuned_adapter=False, merge=False, delete_trainer_after_fine_tune=False):
 
 
300
  """
301
  Conducts the fine-tuning process of a pre-trained language model using specified configurations.
302
  This function encompasses the complete workflow of fine-tuning, including data handling, training,
@@ -313,9 +347,8 @@ def fine_tune(save_fine_tuned_adapter=False, merge=False, delete_trainer_after_f
313
  delete_trainer_after_fine_tune (bool): If True, deletes the trainer object after fine-tuning to free up resources.
314
 
315
  Returns:
316
- The fine-tuned model after the fine-tuning process. This could be either the merged model
317
- or the trained model based on the provided arguments.
318
-
319
  """
320
 
321
  data_handler = FinetuningDataHandler()
 
96
  - print_trainable_parameters: Prints the number of trainable parameters in the model.
97
  """
98
 
99
+
100
  def __init__(self, train_dataset: Dataset, eval_dataset: Dataset) -> None:
101
  """
102
  Initializes the Finetuner class with the model, tokenizer, and datasets.
103
 
104
  Args:
 
 
105
  train_dataset (Dataset): The dataset for training the model.
106
  eval_dataset (Dataset): The dataset for evaluating the model.
107
  """
 
110
  self.merged_model = None
111
  self.train_dataset = train_dataset
112
  self.eval_dataset = eval_dataset
113
+ # please refer to config file 'my_model/config/fine_tuning_config.py' for training arguments description.
114
  self.training_arguments = TrainingArguments(
115
  output_dir=config.OUTPUT_DIR,
116
  num_train_epochs=config.NUM_TRAIN_EPOCHS,
 
134
  report_to="tensorboard"
135
  )
136
 
137
+ def load_LLAMA2_for_finetuning(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
138
  """
139
  Loads the LLAMA2 model and tokenizer, specifically configured for fine-tuning.
 
140
 
141
  Returns:
142
  Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
 
146
  base_model, tokenizer = llm_manager.load_model_and_tokenizer(for_fine_tuning=True)
147
 
148
  return base_model, tokenizer
149
+
150
 
151
  def find_all_linear_names(self) -> List[str]:
152
  """
 
155
  Returns:
156
  List[str]: A list of linear layer names.
157
  """
158
+
159
  cls = bitsandbytes.nn.Linear4bit
160
  lora_module_names = set()
161
  for name, module in self.base_model.named_modules():
 
167
  lora_module_names -= {'lm_head', 'gate_proj'}
168
  return list(lora_module_names)
169
 
170
+
171
  def print_trainable_parameters(self, use_4bit: bool = False) -> None:
172
  """
173
  Calculates and prints the number of trainable parameters in the model.
174
 
175
  Args:
176
  use_4bit (bool): If true, calculates the parameter count considering 4-bit quantization.
177
+
178
+ Returns:
179
+ List[str]: None.
180
  """
181
  trainable_params = sum(p.numel() for p in self.base_model.parameters() if p.requires_grad)
182
  if use_4bit:
 
192
 
193
  Args:
194
  peft_config (LoraConfig): Configuration for the PEFT training process.
195
+
196
+ Returns:
197
+ List[str]: None.
198
  """
199
  self.base_model.config.use_cache = False
200
  # Set the pretraining_tp flag to 1 to enable the use of LoRA (Low-Rank Adapters) layers.
 
214
  )
215
  self.trainer.train()
216
 
217
+ def save_model(self) -> None:
 
218
  """
219
  Saves the fine-tuned model to the specified directory.
220
 
 
224
  for later use or evaluation.
225
 
226
  The saved model can be easily loaded using Hugging Face's model loading utilities.
227
+
228
+ Returns:
229
+ None
230
  """
231
 
232
  self.fine_tuned_adapter_name = config.ADAPTER_SAVE_NAME
233
  self.trainer.model.save_pretrained(self.fine_tuned_adapter_name)
234
 
235
+ def merge_weights(self) -> None:
236
  """
237
  Merges the weights of the fine-tuned adapter with the base model.
238
 
 
243
  After merging, the weights of the adapter are no longer separate from the
244
  base model, enabling more efficient storage and deployment. The merged model
245
  is stored in the 'self.merged_model' attribute of the Finetuner class.
246
+
247
+ Returns:
248
+ None
249
  """
250
 
251
  self.merged_model = PeftModel.from_pretrained(self.base_model, self.fine_tuned_adapter_name)
252
  self.merged_model = self.merged_model.merge_and_unload()
253
+
254
 
255
+ def delete_model(self, model_name: str) -> None:
256
  """
257
  Deletes a specified model attribute.
258
 
259
  Args:
260
  model_name (str): The name of the model attribute to delete.
261
+
262
+ Returns:
263
+ None
264
  """
265
+
266
  try:
267
  if hasattr(self, model_name) and getattr(self, model_name) is not None:
268
  delattr(self, model_name)
 
271
  print(f"Warning: Model '{model_name}' has already been cleared or does not exist.")
272
  except Exception as e:
273
  print(f"Error occurred while deleting model '{model_name}': {str(e)}")
274
+
275
 
276
+ def delete_trainer(self, trainer_name: str) -> None:
277
  """
278
  Deletes a specified trainer object.
279
 
280
  Args:
281
  trainer_name (str): The name of the trainer object to delete.
282
+
283
+ Returns:
284
+ None
285
  """
286
+
287
  try:
288
  if hasattr(self, trainer_name) and getattr(self, trainer_name) is not None:
289
  delattr(self, trainer_name)
 
293
  except Exception as e:
294
  print(f"Error occurred while deleting trainer object '{trainer_name}': {str(e)}")
295
 
296
+
297
+ def clear_training_resources(self) -> None:
298
  """
299
  Clears GPU memory.
300
+
301
+ Returns:
302
+ None
303
  """
304
+
305
  try:
306
  if torch.cuda.is_available():
307
  torch.cuda.empty_cache()
 
309
  except Exception as e:
310
  print(f"Error occurred while clearing GPU memory: {str(e)}")
311
 
312
+
313
+ def clear_cache_and_collect_garbage(self) -> None:
314
  """
315
  Clears Hugging Face's Transformers cache and runs garbage collection.
316
+
317
+ Returns:
318
+ None
319
  """
320
+
321
  try:
322
  if os.path.exists(TRANSFORMERS_CACHE):
323
  shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
 
328
  except Exception as e:
329
  print(f"Error occurred while clearing cache and collecting garbage: {str(e)}")
330
 
331
+
332
+
333
+ def fine_tune(save_fine_tuned_adapter: bool = False, merge: bool = False, delete_trainer_after_fine_tune: bool = False) -> AutoModelForCausalLM:
334
  """
335
  Conducts the fine-tuning process of a pre-trained language model using specified configurations.
336
  This function encompasses the complete workflow of fine-tuning, including data handling, training,
 
347
  delete_trainer_after_fine_tune (bool): If True, deletes the trainer object after fine-tuning to free up resources.
348
 
349
  Returns:
350
+ AutoModelForCausalLM: The fine-tuned model after the fine-tuning process. This could be either the merged model
351
+ or the trained model based on the provided arguments.
 
352
  """
353
 
354
  data_handler = FinetuningDataHandler()