TE_Tinyllama / README.md

Update README.md

a733ab2 verified 6 months ago

5.16 kB

	---
	license: apache-2.0
	---
	Inspired by [sentosa/ZNV-Embedding](https://huggingface.co/sentosa/ZNV-Embedding): A prompt-engineering way to aggregate 'title' info into embeddings.(modifications have been implemented)
	To do:
	1. Re-train the dense layers.
	2. Re-define a more effective concatenation.
	3. Adopt AnglE to finetune the tiny-llama.
	4. Loss function.

	To run TE_Embedding model:
	```python
	import os
	from transformers import (AutoConfig,
	AutoTokenizer,AutoModelForCausalLM
	)
	import torch
	import torch.nn.functional as F
	import numpy as np


	class TEmbeddingModel(torch.nn.Module):
	def __init__(self, model_name_or_path):
	super(TEmbeddingModel, self).__init__()
	self.prompt_prefix = "Reading the below text and answer questions:\n"
	self.prompt_suffixes = ["\n1.One word to summarize the above text:",
	"\n2.The deeper meaning of the above text:"]
	self.hidden_size = 2048 #depends on the model
	self.model_name_or_path = model_name_or_path
	self.linear_suffixes = torch.nn.ModuleList(
	[torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes))
	for _ in range(len(self.prompt_suffixes))])
	self.tokenizer, self.llama = self.load_llama()
	# self.device = torch.device('cuda')
	self.tanh = torch.nn.Tanh()
	self.suffixes_ids = []
	self.suffixes_ids_len = []
	self.suffixes_len = 0
	for suffix in self.prompt_suffixes:
	ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0]
	self.suffixes_ids += ids
	self.suffixes_ids_len.append(len(ids))
	self.suffixes_len += len(ids)

	self.suffixes_ones = torch.ones(self.suffixes_len)
	self.suffixes_ids = torch.tensor(self.suffixes_ids)

	linear_file = ".//TE//linears"
	load_layers = torch.load(linear_file)
	model_state = self.state_dict()
	model_state.update(load_layers)
	self.load_state_dict(model_state, strict=False)

	def load_llama(self):
	llm_path = os.path.join(self.model_name_or_path)
	config = AutoConfig.from_pretrained(llm_path)
	tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
	tokenizer.padding_side = "left"
	model = AutoModelForCausalLM.from_pretrained(
	llm_path,
	config=config,
	low_cpu_mem_usage=True,
	device_map="auto",
	)
	model.config.use_cache = False

	if tokenizer.pad_token is None:
	tokenizer.add_special_tokens({'pad_token': '[PAD]'})
	model.resize_token_embeddings(len(tokenizer))
	return tokenizer, model

	def forward(self, sentences):
	prompts_embeddings = []
	sentences = [self.prompt_prefix + s for s in sentences] #concat前缀
	inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True,
	return_tensors='pt')
	attention_mask = inputs["attention_mask"]
	input_ids = inputs["input_ids"]
	batch_size = len(sentences)
	suffixes_ones = self.suffixes_ones.unsqueeze(0)
	suffixes_ones = suffixes_ones.repeat(batch_size, 1)
	device = next(self.parameters()).device
	attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to(device)

	suffixes_ids = self.suffixes_ids.unsqueeze(0)
	suffixes_ids = suffixes_ids.repeat(batch_size, 1)
	input_ids = torch.cat([input_ids, suffixes_ids], dim=-1) #to("cuda")
	last_hidden_state = self.llama.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state.to(device)
	index = -1
	for i in range(len(self.suffixes_ids_len)):
	embedding = last_hidden_state[:, index, :]
	embedding = self.linear_suffixes[i](embedding)
	prompts_embeddings.append(embedding)
	index -= self.suffixes_ids_len[-i-1]

	output_embedding = torch.cat(prompts_embeddings, dim=-1)
	output_embedding = self.tanh(output_embedding)
	output_embedding = F.normalize(output_embedding, p=2, dim=1)
	return output_embedding

	def encode(self, sentences, batch_size=10, **kwargs):
	size = len(sentences)
	embeddings = None
	handled = 0
	while handled < size:
	tokens = sentences[handled:handled + batch_size]
	output_embeddings = self.forward(tokens)
	result = output_embeddings.detach().cpu().numpy()
	handled += result.shape[0] # <=10
	if embeddings is not None:
	embeddings = np.concatenate((embeddings, result), axis=0)
	else:
	embeddings = result
	return embeddings

	if __name__ == "__main__":
	# TE_model = TEmbeddingModel("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	TE_model = TEmbeddingModel("technicolor/TE_Tinyllama")
	TE_model.eval()
	with torch.no_grad():
	output = TE_model(["Hello", "Nice to meet you"])
	cos_sim = F.cosine_similarity(output[0],output[1],dim=0)
	print(cos_sim)

	```