Introduction
This model is a downstream task of Qwen/Qwen2-1.5B . We leverage the work of FlagEmbedding reranker , and implement with Qwen2-1.5B as pretrained model.
Dependencies
transformers==4.41.2
flash-attn==2.5.7
Usage
from typing import cast, List, Union, Tuple, Dict, Optional
import numpy as np
import torch
from tqdm import tqdm
import transformers
from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer, DataCollatorWithPadding
from transformers.models.qwen2 import Qwen2Config, Qwen2ForSequenceClassification
from transformers.trainer_pt_utils import LabelSmoother
IGNORE_TOKEN_ID = LabelSmoother.ignore_index
def preprocess(
sources,
tokenizer: transformers.PreTrainedTokenizer,
max_len: int = 1024,
) -> Dict:
# Apply prompt templates
input_ids, attention_masks = [], []
for i, source in enumerate(sources):
messages = [
{"role": "user",
"content": "\n\n".join(source)}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text])
input_id = model_inputs['input_ids'][0]
attention_mask = model_inputs['attention_mask'][0]
if len(input_id) > max_len:
## last five tokens: <|im_end|>(151645), \n(198), <|im_start|>(151644), assistant(77091), \n(198)
diff = len(input_id) - max_len
input_id = input_id[:-5-diff] + input_id[-5:]
attention_mask = attention_mask[:-5-diff] + attention_mask[-5:]
assert len(input_id) == max_len
input_ids.append(input_id)
attention_masks.append(attention_mask)
return dict(
input_ids=input_ids,
attention_mask=attention_masks
)
class FlagRerankerCustom:
def __init__(
self,
model: PreTrainedModel,
tokenizer: PreTrainedTokenizer,
use_fp16: bool = False
) -> None:
self.tokenizer = tokenizer
self.model = model
self.data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
if torch.cuda.is_available():
self.device = torch.device('cuda')
elif torch.backends.mps.is_available():
self.device = torch.device('mps')
else:
self.device = torch.device('cpu')
use_fp16 = False
if use_fp16:
self.model.half()
self.model = self.model.to(self.device)
self.model.eval()
self.num_gpus = torch.cuda.device_count()
if self.num_gpus > 1:
print(f"----------using {self.num_gpus}*GPUs----------")
self.model = torch.nn.DataParallel(self.model)
@torch.no_grad()
def compute_score(self, sentence_pairs: Union[List[Tuple[str, str]], Tuple[str, str]], batch_size: int = 64,
max_length: int = 1024) -> List[float]:
if self.num_gpus > 0:
batch_size = batch_size * self.num_gpus
assert isinstance(sentence_pairs, list)
if isinstance(sentence_pairs[0], str):
sentence_pairs = [sentence_pairs]
all_scores = []
for start_index in tqdm(range(0, len(sentence_pairs), batch_size), desc="Compute Scores",
disable=True):
sentences_batch = sentence_pairs[start_index:start_index + batch_size]
inputs = preprocess(sources=sentences_batch, tokenizer=self.tokenizer, max_len=max_length)
inputs = [dict(zip(inputs, t)) for t in zip(*inputs.values())]
inputs = self.data_collator(inputs).to(self.device)
scores = self.model(**inputs, return_dict=True).logits
scores = scores.squeeze()
all_scores.extend(scores.detach().to(torch.float).cpu().numpy().tolist())
if len(all_scores) == 1:
return all_scores[0]
return all_scores
tokenizer = transformers.AutoTokenizer.from_pretrained(
"neofung/LdIR-Qwen2-reranker-1.5B",
padding_side="right",
)
config = Qwen2Config.from_pretrained(
"neofung/LdIR-Qwen2-reranker-1.5B",
trust_remote_code=True,
bf16=True,
)
model = Qwen2ForSequenceClassification.from_pretrained(
"neofung/LdIR-Qwen2-reranker-1.5B",
config = config,
trust_remote_code = True,
)
model = FlagRerankerCustom(model=model, tokenizer=tokenizer, use_fp16=False)
pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
model.compute_score(pairs)
# [-2.655318021774292, 11.7670316696167]
Evaluation on C-MTEB
from C_MTEB.tasks import *
from mteb import MTEB
save_name = "LdIR-Qwen2-reranker-1.5B"
evaluation = MTEB(
task_types=["Reranking"], task_langs=['zh', 'zh2en', 'en2zh']
)
evaluation.run(model, output_folder=f"reranker_results/{save_name}")
- Downloads last month
- 908
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Evaluation results
- map on MTEB CMedQAv1test set self-reported86.504
- mrr on MTEB CMedQAv1test set self-reported88.912
- map on MTEB CMedQAv2test set self-reported87.106
- mrr on MTEB CMedQAv2test set self-reported89.102
- map on MTEB MMarcoRerankingself-reported39.355
- mrr on MTEB MMarcoRerankingself-reported39.076
- map on MTEB T2Rerankingself-reported68.837
- mrr on MTEB T2Rerankingself-reported79.776