KB-VQA

Sleeping

App Files Files Community

KB-VQA / my_model /LLAMA2 /LLAMA2_model.py

m7mdal7aj

Update my_model/LLAMA2/LLAMA2_model.py

f9236c8 verified 9 months ago

raw

history blame

7.35 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from typing import Optional
	import bitsandbytes # only for using on GPU
	import accelerate # only for using on GPU
	from my_model.config import LLAMA2_config as config # Importing LLAMA2 configuration file
	import warnings

	# Suppress only FutureWarning from transformers
	warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")


	class Llama2ModelManager:
	"""
	Manages loading and configuring the LLaMA-2 model and tokenizer.

	Attributes:
	device (str): Device to use for the model ('cuda' or 'cpu').
	model_name (str): Name or path of the pre-trained model.
	tokenizer_name (str): Name or path of the tokenizer.
	quantization (str): Specifies the quantization level ('4bit', '8bit', or None).
	from_saved (bool): Flag to load the model from a saved path.
	model_path (str or None): Path to the saved model if `from_saved` is True.
	trust_remote (bool): Whether to trust remote code when loading the tokenizer.
	use_fast (bool): Whether to use the fast version of the tokenizer.
	add_eos_token (bool): Whether to add an EOS token to the tokenizer.
	access_token (str): Access token for Hugging Face Hub.
	model (AutoModelForCausalLM or None): Loaded model, initially None.
	"""

	def __init__(self) -> None:
	"""
	Initializes the Llama2ModelManager class with configuration settings.
	"""
	self.device: str = config.DEVICE
	self.model_name: str = config.MODEL_NAME
	self.tokenizer_name: str = config.TOKENIZER_NAME
	self.quantization: str = config.QUANTIZATION
	self.from_saved: bool = config.FROM_SAVED
	self.model_path: Optional[str] = config.MODEL_PATH
	self.trust_remote: bool = config.TRUST_REMOTE
	self.use_fast: bool = config.USE_FAST
	self.add_eos_token: bool = config.ADD_EOS_TOKEN
	self.access_token: str = config.ACCESS_TOKEN
	self.model: Optional[AutoModelForCausalLM] = None

	def create_bnb_config(self) -> BitsAndBytesConfig:
	"""
	Creates a BitsAndBytes configuration based on the quantization setting.

	Returns:
	BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
	"""
	if self.quantization == '4bit':
	return BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)
	elif self.quantization == '8bit':
	return BitsAndBytesConfig(
	load_in_8bit=True,
	bnb_8bit_use_double_quant=True,
	bnb_8bit_quant_type="nf4",
	bnb_8bit_compute_dtype=torch.bfloat16
	)

	def load_model(self) -> AutoModelForCausalLM:
	"""
	Loads the LLaMA-2 model based on the specified configuration. If the model is already loaded, returns the existing model.

	Returns:
	AutoModelForCausalLM: Loaded LLaMA-2 model.
	"""
	if self.model is not None:
	print("Model is already loaded.")
	return self.model

	if self.from_saved:
	self.model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map="auto")
	else:
	bnb_config = None if self.quantization is None else self.create_bnb_config()
	self.model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto",
	quantization_config=bnb_config,
	torch_dtype=torch.float16,
	token=self.access_token)

	if self.model is not None:
	print(f"LLAMA2 Model loaded successfully in {self.quantization} quantization.")
	else:
	print("LLAMA2 Model failed to load.")
	return self.model

	def load_tokenizer(self) -> AutoTokenizer:
	"""
	Loads the tokenizer for the LLaMA-2 model with the specified configuration.

	Returns:
	AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
	"""
	self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
	token=self.access_token,
	trust_remote_code=self.trust_remote,
	add_eos_token=self.add_eos_token)

	if self.tokenizer is not None:
	print(f"LLAMA2 Tokenizer loaded successfully.")
	else:
	print("LLAMA2 Tokenizer failed to load.")

	return self.tokenizer

	def load_model_and_tokenizer(self, for_fine_tuning):
	"""
	Loads LLAMa2 model and tokenizer in one method and adds special tokens if the purpose if fine tuning.
	:param for_fine_tuning: YES(True) / NO (False)
	:return: LLAMA2 Model and Tokenizer
	"""
	if for_fine_tuning:
	self.tokenizer = self.load_tokenizer()
	self.model = self.load_model()
	self.add_special_tokens()
	else:
	self.tokenizer = self.load_tokenizer()
	self.model = self.load_model()

	return self.model, self.tokenizer


	def add_special_tokens(self, tokens: Optional[list[str]] = None) -> None:
	"""
	Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded,
	only if the tokenizer is loaded.

	Args:
	tokens (list of str, optional): Special tokens to add. Defaults to a predefined set.

	Returns:
	None
	"""
	if self.tokenizer is None:
	print("Tokenizer is not loaded. Cannot add special tokens.")
	return

	if tokens is None:
	tokens = ['[CAP]', '[/CAP]', '[QES]', '[/QES]', '[OBJ]', '[/OBJ]']

	# Update the tokenizer with new tokens
	print(f"Original vocabulary size: {len(self.tokenizer)}")
	print(f"Adding the following tokens: {tokens}")
	self.tokenizer.add_tokens(tokens, special_tokens=True)
	self.tokenizer.add_special_tokens({'pad_token': '<pad>'})
	print(f"Adding Padding Token {self.tokenizer.pad_token}")
	self.tokenizer.padding_side = "right"
	print(f'Padding side: {self.tokenizer.padding_side}')

	# Resize the model token embeddings if the model is loaded
	if self.model is not None:
	self.model.resize_token_embeddings(len(self.tokenizer))
	self.model.config.pad_token_id = self.tokenizer.pad_token_id

	print(f'Updated Vocabulary Size: {len(self.tokenizer)}')
	print(f'Padding Token: {self.tokenizer.pad_token}')
	print(f'Special Tokens: {self.tokenizer.added_tokens_decoder}')


	if __name__ == "__main__":
	pass
	LLAMA2_manager = Llama2ModelManager()
	LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
	LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
	LLAMA2_manager.add_special_tokens(LLAMA2_model, LLAMA2_tokenizer)