# import packages import os # from tqdm import tqdm # import warnings import json import torch.nn.functional as F import torch import gc from transformers import AutoTokenizer, AutoModelForCausalLM from datetime import datetime import argparse from types import SimpleNamespace import pdb # import mamba_ssm # import rwkv # RWKV4_TOKENIZER_FILE = "./support/20B_tokenizer.json" # device = 'cuda' if torch.cuda.is_available() else 'cpu' device = 'cpu' def load_list_from_json(file_path): """ Loads a list of strings from a JSON file. :param file_path: Path of the JSON file to be loaded. :return: List of strings loaded from the JSON file. """ with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) def calculate_loss(logits, target_token_ids): # shifted_logits = logits[:-1, :] # shifted_targets = target_token_ids[1:] # log_probs = F.log_softmax(shifted_logits, dim=-1) loss = torch.nn.functional.cross_entropy(logits[:-1, :].view(-1, logits.shape[-1]), target_token_ids[1:].view(-1), reduction='none') # pdb.set_trace() # target_log_probs = -log_probs.gather(1, shifted_targets.unsqueeze(1)).squeeze() # # print(target_log_probs) # log_sum = torch.sum(target_log_probs, dim=-1) # print(perplexity_sum) return loss.cpu().numpy() def calculate_log_sum(logits, target_token_ids): shifted_logits = logits[:-1, :] shifted_targets = target_token_ids[1:] log_probs = F.log_softmax(shifted_logits, dim=-1) target_log_probs = -log_probs.gather(1, shifted_targets.unsqueeze(1)).squeeze() # print(target_log_probs) log_sum = torch.sum(target_log_probs, dim=-1) # print(perplexity_sum) return log_sum.item() def print_model_parameters_in_billions(model): total_params = sum(p.numel() for p in model.parameters()) total_params_billion = total_params / 1e9 print(f"Model parameters: {total_params_billion:.3f} billion") # def make_log(data_dict, folder_path): # if not os.path.exists(folder_path): # try: # os.makedirs(folder_path) # print(f"Directory created at {folder_path}") # except Exception as e: # print(f"Error creating directory: {e}") # return # timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # file_name = f"{timestamp}.json" # file_path = os.path.join(folder_path, file_name) # try: # with open(file_path, 'w') as file: # json.dump(data_dict, file, indent=4) # print(f"Dictionary saved successfully to {file_path}") # except Exception as e: # print(f"Error saving dictionary: {e}") # def load_rwkv(path): # os.environ['RWKV_JIT_ON'] = '1' # os.environ["RWKV_CUDA_ON"] = '1' # from rwkv.model import RWKV # from rwkv.utils import PIPELINE # rwkv_model = RWKV(model=path, strategy='cuda fp16') # rwkv_pipeline = PIPELINE(rwkv_model, r"rwkv_vocab_v20230424") # rwkv_tokenizer = rwkv_pipeline.tokenizer # return rwkv_model, rwkv_tokenizer # def load_rwkv4pile(path): # os.environ['RWKV_JIT_ON'] = '1' # os.environ["RWKV_CUDA_ON"] = '1' # from rwkv.model import RWKV # from rwkv.utils import PIPELINE # rwkv_model = RWKV(model=path, strategy='cuda fp16') # rwkv_pipeline = PIPELINE(rwkv_model, RWKV4_TOKENIZER_FILE) # rwkv_tokenizer = rwkv_pipeline.tokenizer # return rwkv_model, rwkv_tokenizer def load_hf_model(path, cache_path): hf_tokenizer = AutoTokenizer.from_pretrained(path) if cache_path is not None: # pdb.set_trace() hf_model = AutoModelForCausalLM.from_pretrained(path, device_map=device, trust_remote_code=True, cache_dir=cache_path).eval() else: hf_model = AutoModelForCausalLM.from_pretrained(path, device_map=device, trust_remote_code=True).eval() print_model_parameters_in_billions(hf_model) return hf_model, hf_tokenizer # def load_mamba(path): # from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel # mamba_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") # mamba_model = MambaLMHeadModel.from_pretrained(path, device="cuda", dtype=torch.float16) # mamba_model.device = torch.device('cuda') # print_model_parameters_in_billions(mamba_model) # return mamba_model, mamba_tokenizer # def eval_rwkv(model, tokenizer, texts, chunk_size, v4pile=False): # rwkv_test_data = [] # rwkv_token_length_list = [] # for idx, sample in tqdm(enumerate(texts), total=len(texts)): # with torch.no_grad(): # if v4pile: # input_seq = tokenizer.encode(sample).ids # v4 # else: # input_seq = tokenizer.encode(sample) # input_length = len(input_seq) # neg_log_prob_temp = 0 # # for begin in range(0, input_length, chunk_size): # input_chunk = input_seq[:chunk_size] # logit = model.forward(input_chunk, None, full_output=True)[0] # if len(input_chunk) == 1: # logit = logit.unsqueeze(0) # log_sum = calculate_log_sum(logit, torch.tensor(input_chunk).cuda()) # neg_log_prob_temp += log_sum # rwkv_token_length_list.append(input_length) # rwkv_test_data.append(neg_log_prob_temp) # data_dict = { # 'neg_log_prob_sum': sum(rwkv_test_data) / len(rwkv_test_data), # 'avg tokens': sum(rwkv_token_length_list) / len(rwkv_token_length_list), # } # print(f'log probability sum: {sum(rwkv_test_data) / len(rwkv_test_data):.2f}') # print(f'avg tokens: {sum(rwkv_token_length_list) / len(rwkv_token_length_list):.0f}') return logit,logit,input_chunk,tokenizer def eval_hf_model(model, tokenizer, texts, chunk_size): data = [] token_length_list = [] # for idx, sample in tqdm(enumerate(texts), total=len(texts)):#TODO deleta the forloop with torch.no_grad(): inputs = tokenizer(texts, return_tensors='pt') inputs = inputs.to(model.device) seq_length = inputs['input_ids'].shape[-1] neg_log_prob_temp = 0 # for begin in range(0, seq_length, chunk_size): input_chunk = inputs['input_ids'][:, :chunk_size] logit = model.forward(input_ids=input_chunk).logits[0, :, :] log_sum = calculate_log_sum(logit, input_chunk.squeeze(0))# suppose shape of logit is (seq_length, vocab_size),shape of input_chunk is (,seq_length) neg_log_prob_temp += log_sum loss = calculate_loss(logit, input_chunk.squeeze(0)) # token_length_list.append(seq_length) # data.append(neg_log_prob_temp) # data_dict = { # 'neg_log_prob_sum': sum(data) / len(data), # 'avg tokens': sum(token_length_list) / len(token_length_list), # } # print(f'log probability sum: {sum(data) / len(data):.2f}') # print(f'avg tokens: {sum(token_length_list) / len(token_length_list):.0f}') rtn_dic={'logit':logit.cpu().numpy(),'input_ids':input_chunk.cpu().numpy()[0],'loss':loss,'tokenizer':tokenizer,'neg_log_prob_temp':neg_log_prob_temp} return rtn_dic # if __name__ == '__main__': # parser = argparse.ArgumentParser() # parser.add_argument('--model', type=str, required=True, help='model name or path') # parser.add_argument('--model_type', choices=['hf', 'rwkv', 'mamba', 'rwkv4pile'], required=True, help='model type') # parser.add_argument('--data', type=str, required=True, help='data path (json file)') # parser.add_argument('--log_path', type=str, default='./logs/', help='log file path') # parser.add_argument('--model_cache', type=str, help='hugging face model cache') # parser.add_argument('--chunk_size', type=int, default=1024, help='chunk size') def run_get_loss(args=None): if args is None: # args=SimpleNamespace(model='microsoft/phi-2',texts='Hello FreshBench !',model_type='hf',model_cache=None,chunk_size=1024) args=SimpleNamespace(model='/home/sribd/chenghao/models/phi-2',texts='Hello FreshBench !',model_type='hf',model_cache=None,chunk_size=1024) if 'chunk_size' not in args.__dict__: args.chunk_size=1024 if 'model_type' not in args.__dict__: args.model_type='hf' if 'model' not in args.__dict__ or len(args.model)<2: # args.model='/home/sribd/chenghao/models/phi-2' args.model='microsoft/phi-2' if 'model_cache' not in args.__dict__: # args.model_cache=args.model args.model_cache=None # args = parser.parse_args() # load data # texts = load_list_from_json(args.data) print('args',args) texts=args.texts print(f'data size: {len(texts)}') # load model if args.model_type == 'hf': model, tokenizer = load_hf_model(args.model, args.model_cache)# tokenzier path, model path # elif args.model_type == 'rwkv': # model, tokenizer = load_rwkv(args.model) # elif args.model_type == 'mamba': # model, tokenizer = load_mamba(args.model) # elif args.model_type == 'rwkv4pile': # model, tokenizer = load_rwkv4pile(args.model) else: raise NotImplementedError # eval if args.model_type in ['hf', 'mamba']: print(f'eval hf') return eval_hf_model(model=model, tokenizer=tokenizer, texts=texts, chunk_size=args.chunk_size) # elif args.model_type == 'rwkv': # return eval_rwkv(model=model, tokenizer=tokenizer, texts=texts, chunk_size=args.chunk_size) # elif args.model_type == 'rwkv4pile': # return eval_rwkv(model=model, tokenizer=tokenizer, texts=texts, chunk_size=args.chunk_size, v4pile=True) else: raise NotImplementedError # results['model_name_or_path'] = args.model # results['data_path'] = args.data # results['chunk_size'] = args.chunk_size # make_log(results, args.log_path) # print(json.dumps(results, indent=4, ensure_ascii=False)) if __name__ == '__main__': args=SimpleNamespace(model='microsoft/phi-2',texts='Hello FreshBench !',model_type='hf',model_cache=None,chunk_size=1024) run_get_loss(args) # run_get_loss(args)