You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I want to use Amazon bedrock models as vectorizer but there is no such vectorizer. How can i use such embedding models?
I have created a custom vectorizer:
`import os
import boto3
from typing import Callable, Dict, List, Optional
from tenacity import retry, stop_after_attempt, wait_random_exponential
from langchain_community.embeddings.bedrock import BedrockEmbeddings
from tenacity.retry import retry_if_not_exception_type
from redisvl.utils.vectorize.base import BaseVectorizer
class AmazonBedrockTextVectorizer(BaseVectorizer):
def __init__(
self, model_id: str = "amazon.titan-embed-text-v1", api_config: Optional[Dict] = None
):
# Set up AWS credentials
aws_access_key_id = api_config.get("aws_access_key_id") if api_config else os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = api_config.get("aws_secret_access_key") if api_config else os.getenv("AWS_SECRET_ACCESS_KEY")
region_name = api_config.get("region_name", "us-east-1") if api_config else os.getenv("AWS_REGION", "us-east-1")
if not aws_access_key_id or not aws_secret_access_key:
raise ValueError("AWS access key and secret key are required.")
# Initialize Bedrock client
client = boto3.client(
'bedrock-runtime',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name
)
# print(f"Client --> {client}")
dims = self._set_model_dims(model=model_id, client=client)
super().__init__(model=model_id, dims=dims, client=client)
@staticmethod
def _set_model_dims(client, model) -> int:
try:
bedrock_em_model = BedrockEmbeddings(model_id=model,client=client)
embedding = bedrock_em_model.embed_query("dimension test")
except (KeyError, IndexError) as ke:
raise ValueError(f"Unexpected response from the Cohere API: {str(ke)}")
except Exception as e: # pylint: disable=broad-except
# fall back (TODO get more specific)
raise ValueError(f"Error setting embedding model dimensions: {str(e)}")
return len(embedding)
@retry(
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
retry=retry_if_not_exception_type(TypeError),
)
def embed(
self,
text: str,
preprocess: Optional[Callable] = None,
as_buffer: bool = False,
**kwargs,
) -> List[float]:
if not isinstance(text, str):
raise TypeError("Must pass in a str value to embed.")
if preprocess:
text = preprocess(text)
bedrock_em_model = BedrockEmbeddings(model_id=self.model,client=self.client)
embedding = bedrock_em_model.embed_query(text)
# print(f"\n\n **** Embedding: {embedding} ****")
return self._process_embedding(embedding, as_buffer)
@retry(
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
retry=retry_if_not_exception_type(TypeError),
)
def embed_many(
self,
texts: List[str],
preprocess: Optional[Callable] = None,
batch_size: int = 10,
as_buffer: bool = False,
**kwargs,
) -> List[List[float]]:
input_type = kwargs.get("input_type")
if not isinstance(texts, list):
raise TypeError("Must pass in a list of str values to embed.")
if len(texts) > 0 and not isinstance(texts[0], str):
raise TypeError("Must pass in a list of str values to embed.")
if not isinstance(input_type, str):
raise TypeError(
"Must pass in a str value for cohere embedding input_type.\
See AWS"
)
embeddings: List = []
bedrock_em_model = BedrockEmbeddings(model_id=self.model,client=self.client)
for batch in self.batchify(texts, batch_size, preprocess):
response = bedrock_em_model.embed_query(texts=batch)
embeddings += [
self._process_embedding(embedding, as_buffer)
for embedding in response.embeddings
]
# print(f"\n\n **** Embeddings: {embeddings} ****")
return embeddings`
It is working perfectly and creating vectors.
But when I check the vectors dimensions for Huggingface, OpenAI, AmazonTitan Models
Hf : 768 AZ : 1536 OpenAI : 1536
Here I found one similarity that amazon titan and OpenAI models have same dimensions. and its greater than HFace.
I guess that why it giving this error:
ResponseError: Error parsing vector similarity query: query vector blob size (6144) does not match index's expected size (3072).
While using Hugging face default model its working great.
The text was updated successfully, but these errors were encountered:
@neeraj-mahapatra Can you show what your index schema looks like (the yaml or dict)? And also verify the output of FT.INFO <index-name> (this is the raw redis command to run on cli or within redis insight. My guess is that the schema still has the 768 embedding dimensions here when you are using AWS or OpenAI models.
I want to use Amazon bedrock models as vectorizer but there is no such vectorizer. How can i use such embedding models?
I have created a custom vectorizer:
`import os
import boto3
from typing import Callable, Dict, List, Optional
from tenacity import retry, stop_after_attempt, wait_random_exponential
from langchain_community.embeddings.bedrock import BedrockEmbeddings
from tenacity.retry import retry_if_not_exception_type
from redisvl.utils.vectorize.base import BaseVectorizer
class AmazonBedrockTextVectorizer(BaseVectorizer):
It is working perfectly and creating vectors.
But when I check the vectors dimensions for Huggingface, OpenAI, AmazonTitan Models
Hf : 768 AZ : 1536 OpenAI : 1536
Here I found one similarity that amazon titan and OpenAI models have same dimensions. and its greater than HFace.
I guess that why it giving this error:
ResponseError: Error parsing vector similarity query: query vector blob size (6144) does not match index's expected size (3072).
While using Hugging face default model its working great.
The text was updated successfully, but these errors were encountered: