|
--- |
|
license: apache-2.0 |
|
--- |
|
Inspired by [sentosa/ZNV-Embedding](https://huggingface.co/sentosa/ZNV-Embedding): A prompt-engineering way to aggregate 'title' info into embeddings.(modifications have been implemented) |
|
To do: |
|
1. Re-train the dense layers. |
|
2. Re-define a more effective concatenation. |
|
3. Adopt AnglE to finetune the tiny-llama. |
|
4. Loss function. |
|
|
|
To run TE_Embedding model: |
|
```python |
|
import os |
|
from transformers import (AutoConfig, |
|
AutoTokenizer,AutoModelForCausalLM |
|
) |
|
import torch |
|
import torch.nn.functional as F |
|
import numpy as np |
|
|
|
|
|
class TEmbeddingModel(torch.nn.Module): |
|
def __init__(self, model_name_or_path): |
|
super(TEmbeddingModel, self).__init__() |
|
self.prompt_prefix = "Reading the below text and answer questions:\n" |
|
self.prompt_suffixes = ["\n1.One word to summarize the above text:", |
|
"\n2.The deeper meaning of the above text:"] |
|
self.hidden_size = 2048 #depends on the model |
|
self.model_name_or_path = model_name_or_path |
|
self.linear_suffixes = torch.nn.ModuleList( |
|
[torch.nn.Linear(self.hidden_size, self.hidden_size//len(self.prompt_suffixes)) |
|
for _ in range(len(self.prompt_suffixes))]) |
|
self.tokenizer, self.llama = self.load_llama() |
|
# self.device = torch.device('cuda') |
|
self.tanh = torch.nn.Tanh() |
|
self.suffixes_ids = [] |
|
self.suffixes_ids_len = [] |
|
self.suffixes_len = 0 |
|
for suffix in self.prompt_suffixes: |
|
ids = self.tokenizer(suffix, return_tensors="pt")["input_ids"].tolist()[0] |
|
self.suffixes_ids += ids |
|
self.suffixes_ids_len.append(len(ids)) |
|
self.suffixes_len += len(ids) |
|
|
|
self.suffixes_ones = torch.ones(self.suffixes_len) |
|
self.suffixes_ids = torch.tensor(self.suffixes_ids) |
|
|
|
linear_file = ".//TE//linears" |
|
load_layers = torch.load(linear_file) |
|
model_state = self.state_dict() |
|
model_state.update(load_layers) |
|
self.load_state_dict(model_state, strict=False) |
|
|
|
def load_llama(self): |
|
llm_path = os.path.join(self.model_name_or_path) |
|
config = AutoConfig.from_pretrained(llm_path) |
|
tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path) |
|
tokenizer.padding_side = "left" |
|
model = AutoModelForCausalLM.from_pretrained( |
|
llm_path, |
|
config=config, |
|
low_cpu_mem_usage=True, |
|
device_map="auto", |
|
) |
|
model.config.use_cache = False |
|
|
|
if tokenizer.pad_token is None: |
|
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) |
|
model.resize_token_embeddings(len(tokenizer)) |
|
return tokenizer, model |
|
|
|
def forward(self, sentences): |
|
prompts_embeddings = [] |
|
sentences = [self.prompt_prefix + s for s in sentences] #concat前缀 |
|
inputs = self.tokenizer(sentences, max_length=256, padding=True, truncation=True, |
|
return_tensors='pt') |
|
attention_mask = inputs["attention_mask"] |
|
input_ids = inputs["input_ids"] |
|
batch_size = len(sentences) |
|
suffixes_ones = self.suffixes_ones.unsqueeze(0) |
|
suffixes_ones = suffixes_ones.repeat(batch_size, 1) |
|
device = next(self.parameters()).device |
|
attention_mask = torch.cat([attention_mask, suffixes_ones], dim=-1).to(device) |
|
|
|
suffixes_ids = self.suffixes_ids.unsqueeze(0) |
|
suffixes_ids = suffixes_ids.repeat(batch_size, 1) |
|
input_ids = torch.cat([input_ids, suffixes_ids], dim=-1) #to("cuda") |
|
last_hidden_state = self.llama.base_model(attention_mask=attention_mask, input_ids=input_ids).last_hidden_state.to(device) |
|
index = -1 |
|
for i in range(len(self.suffixes_ids_len)): |
|
embedding = last_hidden_state[:, index, :] |
|
embedding = self.linear_suffixes[i](embedding) |
|
prompts_embeddings.append(embedding) |
|
index -= self.suffixes_ids_len[-i-1] |
|
|
|
output_embedding = torch.cat(prompts_embeddings, dim=-1) |
|
output_embedding = self.tanh(output_embedding) |
|
output_embedding = F.normalize(output_embedding, p=2, dim=1) |
|
return output_embedding |
|
|
|
def encode(self, sentences, batch_size=10, **kwargs): |
|
size = len(sentences) |
|
embeddings = None |
|
handled = 0 |
|
while handled < size: |
|
tokens = sentences[handled:handled + batch_size] |
|
output_embeddings = self.forward(tokens) |
|
result = output_embeddings.detach().cpu().numpy() |
|
handled += result.shape[0] # <=10 |
|
if embeddings is not None: |
|
embeddings = np.concatenate((embeddings, result), axis=0) |
|
else: |
|
embeddings = result |
|
return embeddings |
|
|
|
if __name__ == "__main__": |
|
# TE_model = TEmbeddingModel("TinyLlama/TinyLlama-1.1B-Chat-v1.0") |
|
TE_model = TEmbeddingModel("technicolor/TE_Tinyllama") |
|
TE_model.eval() |
|
with torch.no_grad(): |
|
output = TE_model(["Hello", "Nice to meet you"]) |
|
cos_sim = F.cosine_similarity(output[0],output[1],dim=0) |
|
print(cos_sim) |
|
|
|
``` |
|
|