Error when to encode
Hello,
I enjoyed reading your research paper.
I am trying to reproduce the code you provided, but I am receiving the following error message.
Would you be able to provide any assistance?
Code error occurred
instruction = (
"Given a web search query, retrieve relevant passages that answer the query:"
)
queries = [
[instruction, "how much protein should a female eat"],
[instruction, "summit define"],
]
q_reps = l2v.encode(queries)
Error
AttributeError Traceback (most recent call last)
Cell In[2], line 9
2 instruction = (
3 "Given a web search query, retrieve relevant passages that answer the query:"
4 )
5 queries = [
6 [instruction, "how much protein should a female eat"],
7 [instruction, "summit define"],
8 ]
----> 9 q_reps = l2v.encode(queries)
File /usr/local/lib/python3.10/dist-packages/llm2vec/llm2vec.py:350, in LLM2Vec.encode(self, sentences, batch_size, show_progress_bar, convert_to_numpy, convert_to_tensor, device)
345 with cuda_compatible_multiprocess.Pool(num_proc) as p:
346 sentences_batches = [
347 sentences_sorted[start_index : start_index + batch_size]
348 for start_index in trange(0, len(sentences), batch_size)
349 ]
--> 350 for result in p.map(
351 partial(
352 self._encode,
353 # This branch only supports CUDA devices, so we ignore the value of device
354 # and let _encode determine it based on rank.
355 device=None,
356 convert_to_numpy=convert_to_numpy,
357 multiprocessing=True,
358 ),
359 sentences_batches,
360 ):
361 all_embeddings.append(result)
363 all_embeddings = torch.cat(all_embeddings, dim=0)
File /usr/lib/python3.10/multiprocessing/pool.py:367, in Pool.map(self, func, iterable, chunksize)
362 def map(self, func, iterable, chunksize=None):
363 '''
364 Apply func
to each element in iterable
, collecting the results
365 in a list that is returned.
366 '''
--> 367 return self._map_async(func, iterable, mapstar, chunksize).get()
File /usr/lib/python3.10/multiprocessing/pool.py:774, in ApplyResult.get(self, timeout)
772 return self._value
773 else:
--> 774 raise self._value
File /usr/lib/python3.10/multiprocessing/pool.py:540, in Pool._handle_tasks(taskqueue, put, outqueue, pool, cache)
538 break
539 try:
--> 540 put(task)
541 except Exception as e:
542 job, idx = task[:2]
File /usr/lib/python3.10/multiprocessing/connection.py:206, in _ConnectionBase.send(self, obj)
204 self._check_closed()
205 self._check_writable()
--> 206 self._send_bytes(_ForkingPickler.dumps(obj))
File /usr/lib/python3.10/multiprocessing/reduction.py:51, in ForkingPickler.dumps(cls, obj, protocol)
48 @classmethod
49 def dumps(cls, obj, protocol=None):
50 buf = io.BytesIO()
---> 51 cls(buf, protocol).dump(obj)
52 return buf.getbuffer()
AttributeError: Can't pickle local object 'add_hook_to_module..new_forward'
Hi @supark ,
Thanks for your interest in our work, can you provide the complete code snippet?
The code below is working on my 2-GPU system
from llm2vec import LLM2Vec
import torch
if __name__ == "__main__":
l2v = LLM2Vec.from_pretrained(
"McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
peft_model_name_or_path="McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse",
device_map="cuda" if torch.cuda.is_available() else "cpu",
torch_dtype=torch.bfloat16,
)
instruction = (
"Given a web search query, retrieve relevant passages that answer the query:"
)
queries = [
[instruction, "how much protein should a female eat"],
[instruction, "summit define"],
]
q_reps = l2v.encode(queries)
Hello.
I have resolved the issue I inquired about.
It seems to have been a version issue with some dependent packages.
If possible, it would be helpful if you could specify the package versions used in the repository or sample code.
Thank you.