Getting error generating embeddings

#27
by HelloWorld2512 - opened

I am trying to deploy the instructor embedding using the following:

from typing import Any, List
from InstructorEmbedding import INSTRUCTOR

from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.embeddings import BaseEmbedding

class InstructorEmbeddings(BaseEmbedding):
_model: INSTRUCTOR = PrivateAttr()
_instruction: str = PrivateAttr()

def __init__(
    self,
    instructor_model_name: str = "hkunlp/instructor-large",
    instruction: str = "Represent a document for question answer retrieval:",
    **kwargs: Any,
) -> None:
    self._model = INSTRUCTOR(instructor_model_name)
    self._instruction = instruction
    super().__init__(**kwargs)

@classmethod
def class_name(cls) -> str:
    return "instructor"

async def _aget_query_embedding(self, query: str) -> List[float]:
    return self._get_query_embedding(query)

async def _aget_text_embedding(self, text: str) -> List[float]:
    return self._get_text_embedding(text)

def _get_query_embedding(self, query: str) -> List[float]:
    embeddings = self._model.encode([[self._instruction, query]])
    return embeddings[0]

def _get_text_embedding(self, text: str) -> List[float]:
    embeddings = self._model.encode([[self._instruction, text]])
    return embeddings[0]

def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
    embeddings = self._model.encode(
        [[self._instruction, text] for text in texts]
    )
    return embeddings

embed_model = InstructorEmbeddings(embed_batch_size=2)

Settings.embed_model = embed_model
Settings.chunk_size = 512 # Check for 1024

index = VectorStoreIndex.from_documents(documents)

However I am getting the following error:

raise ValidationError([error_], self.class)
pydantic.v1.error_wrappers.ValidationError: 1 validation error for TextNode
embedding
value is not a valid list (type=type_error.list)

Has anyone faced this issue before? and what worked for you?

Sign up or log in to comment