Integrating Nebius AI

Dear All,

Has anyone attempted to integrate Nebius.ai for short-term and entity memory?

If yes, do you have a working embedder_config for RAGStorage?

The issue is this: the ChromaDB is able to handle embedding dimention 1536 but nebius embedding reverts back with 4096 dimensions.

Is there a simpler way to use ChromaDB in an isolated on-prem environment, without internet access default OpenAI embeddings don’t work?

I have below code, any recommendations?

class NebiusEmbeddings(Embeddings):
def init(self, api_key: str):
self.client = OpenAI(
base_url=“https://api.studio.nebius.com/v1/”,
api_key=api_key
)
self.model = “BAAI/bge-en-icl”
self.target_dim = 1536
self.pca = None

def _resize_embedding(self, embedding):
    # Simple truncation or padding
    if len(embedding) > self.target_dim:
        return embedding[:self.target_dim]  # Truncate
    elif len(embedding) < self.target_dim:
        return embedding + [0.0] * (self.target_dim - len(embedding))  # Pad
    return embedding

def _resize_embeddings(self, embeddings):
    # Apply resize to each embedding
    return [self._resize_embedding(emb) for emb in embeddings]

def embed_documents(self, texts: List[str]) -> List[List[float]]:
    embeddings = []
    for text in texts:
        response = self.client.embeddings.create(
            model=self.model,
            input=text
        )
        embeddings.append(response.data[0].embedding)

    # Resize all embeddings at once
    return self._resize_embeddings(embeddings)

def embed_query(self, text: str) -> List[float]:
    response = self.client.embeddings.create(
        model=self.model,
        input=text
    )
    # Resize single embedding
    return self._resize_embedding(response.data[0].embedding)

class NebiusEmbeddingFunction(EmbeddingFunction):
def init(self, nebius_embedder):
self.nebius_embedder = nebius_embedder

def __call__(self, input: Documents) -> Embeddings:
    if isinstance(input, str):
        input = [input]
    embeddings = self.nebius_embedder.embed_documents(input)
    return cast(Embeddings, embeddings)


nebius_embedder = NebiusEmbeddings(api_key="APIKey")

nebius_embedding_function = NebiusEmbeddingFunction(nebius_embedder)

short_term_mem = ShortTermMemory(
    storage=RAGStorage(
        embedder_config={
            "provider": "custom",
            "config": {
                "embedder": nebius_embedding_function  # This is the key part
            }
        },
        type="short_term",
        path="./memory/shortterm/",
        embedding_dimension=4096  # Add this parameter

    )
)

Thank you.

The answer seems to be simple, the dimension should have gone under config parameter:

embedder_cnf = {
            "provider": "custom",
            "config": {
                "embedder": nebius_embedding_function,  # This is the key part
                "dimensions": 4096,

            }
        }

# Use it in your memory configuration
short_term_mem = ShortTermMemory(
    storage=RAGStorage(
        embedder_config=embedder_cnf,
        type="short_term",
        path="./memory/shortterm/",

    )
)