Getting error generating embeddings
I am trying to deploy the instructor embedding using the following:
from typing import Any, List
from InstructorEmbedding import INSTRUCTOR
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.embeddings import BaseEmbedding
class InstructorEmbeddings(BaseEmbedding):
_model: INSTRUCTOR = PrivateAttr()
_instruction: str = PrivateAttr()
def __init__(
self,
instructor_model_name: str = "hkunlp/instructor-large",
instruction: str = "Represent a document for question answer retrieval:",
**kwargs: Any,
) -> None:
self._model = INSTRUCTOR(instructor_model_name)
self._instruction = instruction
super().__init__(**kwargs)
@classmethod
def class_name(cls) -> str:
return "instructor"
async def _aget_query_embedding(self, query: str) -> List[float]:
return self._get_query_embedding(query)
async def _aget_text_embedding(self, text: str) -> List[float]:
return self._get_text_embedding(text)
def _get_query_embedding(self, query: str) -> List[float]:
embeddings = self._model.encode([[self._instruction, query]])
return embeddings[0]
def _get_text_embedding(self, text: str) -> List[float]:
embeddings = self._model.encode([[self._instruction, text]])
return embeddings[0]
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
embeddings = self._model.encode(
[[self._instruction, text] for text in texts]
)
return embeddings
embed_model = InstructorEmbeddings(embed_batch_size=2)
Settings.embed_model = embed_model
Settings.chunk_size = 512 # Check for 1024
index = VectorStoreIndex.from_documents(documents)
However I am getting the following error:
raise ValidationError([error_], self.class)
pydantic.v1.error_wrappers.ValidationError: 1 validation error for TextNode
embedding
value is not a valid list (type=type_error.list)
Has anyone faced this issue before? and what worked for you?