String Knowledge sources not working with Gemini

Hey guys,

Have anyone tried using StringKnowledgeSource with Gemini? Looks like there is a bug, although Gemni model is configured correctly, KnowledgeSource is always trying to create default embedder and it looks for OpenAI keys and it fails.

../../.venv/lib/python3.12/site-packages/crewai/project/crew_base.py:26: in __init__
    super().__init__(*args, **kwargs)
hr_slack_bot/crew.py:46: in __init__
    self.string_source = StringKnowledgeSource(
../../.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py:51: in __init__
    self._set_embedder_config(embedder_config)
../../.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py:174: in _set_embedder_config
    else self._create_default_embedding_function()
../../.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py:158: in _create_default_embedding_function
    return OpenAIEmbeddingFunction(

Any solution / work arround is welcome.

Thanks.

@subbu It’s not a bug. CrewAI uses the OpenAI embedding LLM by default, and that’s why the code searched for the OpenAI API key that couldn’t be found. However, you can customize the embedding LLM to your liking by configuring the embedder for the knowledge store. See the docs.

# ...

string_source = StringKnowledgeSource(
    content="Users name is John. He is 30 years old and lives in San Francisco.",
)

crew = Crew(
    ...,
    knowledge_sources=[string_source],
    embedder={
        "provider": "openai", # Set the embedding LLM provider here
        "config": {"model": "text-embedding-3-small"}, # Set the embedding LLM here
    },
)

# ...

Thanks for the quick reply @rokbenko . Actually I wanted to use the google embedder.

My code looks like this:

 crew = Crew(
            ....,
            verbose=True,
            knowledge_sources=[self.string_source],
            embedder={
                "provider": "google",
                "config": {"model": "models/embedding-001"},
            },
        )

For this code, I’m getting this error

File "./.venv/lib/python3.12/site-packages/crewai/project/crew_base.py", line 26, in __init__
super().__init__(*args, **kwargs)
File "./src/demo_crew/crew.py", line 31, in __init__
self.string_source = StringKnowledgeSource(
                     ^^^^^^^^^^^^^^^^^^^^^^
                     File "./.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                 File "./.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 51, in __init__
self._set_embedder_config(embedder_config)
File "./.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 174, in _set_embedder_config
else self._create_default_embedding_function()
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     File "./.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 158, in _create_default_embedding_function
return OpenAIEmbeddingFunction(
    ^^^^^^^^^^^^^^^^^^^^^^^^
    File "./.venv/lib/python3.12/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
raise ValueError(
    ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys

What I suspect is irrespective of the provider passed, knowledge store is always trying to create default embedder.

Can you try with above embedder config, and let me know your results pls?

@subbu Can you please share your full code?

Here you go

crew.py

from crewai import Agent, Crew, Process, Task
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.project import CrewBase, agent, crew, task
from pydantic import BaseModel


class Response(BaseModel):
    content: str


@CrewBase
class HrSlackBotCrew():
    """HrSlackBot crew"""

    def __init__(self, knowledge: str = None):
        if knowledge:
            self.string_source = StringKnowledgeSource(
                content=knowledge,
            )
        else:
            self.string_source = None

    @agent
    def hr_agent(self) -> Agent:
        return Agent(
            config=self.agents_config['hr_agent'],
        )

    @task
    def hr_task(self) -> Task:
        return Task(
            config=self.tasks_config['hr_task'],
            output_json=Response,
        )

    @crew
    def hr_crew(self) -> Crew:
        """Creates the HR Agent crew"""
        return Crew(
            agents=[self.hr_agent()],  # Automatically created by the @agent decorator
            tasks=[self.hr_task()],  # Automatically created by the @task decorator
            process=Process.sequential,
            verbose=True,
            knowledge_sources=[self.string_source] if self.string_source else None,
            embedder={
                "provider": "google",
                "config": {
                    "model_name": "models/embedding-001"
                }
            }
        )

1. Execution without Knowledge

def run():
    """
    Run the crew.
    """
    inputs = {
        "user": "Subbu",
        "query": "Who is John?",
        'context': "Subbu: Hi, good morning."
    }
    HrSlackBotCrew().hr_crew().kickoff(inputs=inputs)

Output:

# Agent: HR Associate
## Final Answer: 
```json
{
  "content": "Hi Subbu! 👋 Good morning to you too!\n\nI'm afraid I don't have access to personal information about other employees.  It's important to protect everyone's privacy. 😊\n\nIf you have any HR policy questions, I'd be happy to help!  For this particular question, you might want to ask someone else.  Let me know if there's anything else I can assist you with!"
}

2. Execution with Knowledge

def run():
    """
    Run the crew.
    """
    inputs = {
        "user": "Subbu",
        "query": "Who is John?",
        'context': "Subbu: Hi, good morning."
    }
    HrSlackBotCrew("Employee name is John. He is 30 years old and lives in San Francisco.").hr_crew().kickoff(inputs=inputs)

Error:

File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/crewai/project/crew_base.py", line 26, in __init__
    super().__init__(*args, **kwargs)
  File "/crewai/hr_slack_bot/src/hr_slack_bot/crew.py", line 24, in __init__
    self.string_source = StringKnowledgeSource(
                         ^^^^^^^^^^^^^^^^^^^^^^
  File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 51, in __init__
    self._set_embedder_config(embedder_config)
  File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 174, in _set_embedder_config
    else self._create_default_embedding_function()
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 158, in _create_default_embedding_function
    return OpenAIEmbeddingFunction(
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/crewai/hr_slack_bot/.venv/lib/python3.12/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
    raise ValueError(
ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys
An error occurred while running the crew: Command '['uv', 'run', 'run_crew']' returned non-zero exit status 1.

@subbu Will let the CrewAI staff know about the issue and get back to you. It might be a bug.

1 Like

I’m having the same issue here. Whatever i do for try fix that, in the end anything don’t solve.

1 Like

hi @rokbenko can i get some update on this issue pls?

@subbu They couldn’t reproduce the error. After that, the conversation died. I pinged CrewAI staff about it.

@rokbenko The issue was easily reproducible with the above shared code. Let me know if any more details required, I’m more than happy to help the team to reproduce the issue. Thanks.

I am having the same issue. Using the code from the guide in the documentation throws the ValueError if I use any other embedded other than open-ai.

crewai version is 0.85.0

1 Like

I also get the same issue using Amazon Bedrock…


	@crew
	def crew(self) -> Crew:
		"""Creates the Testone crew"""

		content = "Users name is John. He is 30 years old and lives in San Francisco."
		string_source = StringKnowledgeSource(
			content=content,
		)


		return Crew(
			agents=self.agents,
			tasks=self.tasks,
			process=Process.sequential,
			verbose=True,
			knowledge_sources=[string_source],
			embedder=dict(
				provider="bedrock",
				config=dict(
					model="bedrock/amazon.titan-embed-text-v2:0",
					region="us-east-1"
				)
			)
		)



  File "/root/development/crewai-review/testone/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 51, in __init__
    self._set_embedder_config(embedder_config)
  File "/root/development/crewai-review/testone/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 174, in _set_embedder_config
    else self._create_default_embedding_function()
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/development/crewai-review/testone/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 158, in _create_default_embedding_function
    return OpenAIEmbeddingFunction(
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/development/crewai-review/testone/.venv/lib/python3.12/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
    raise ValueError(
ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys

crewai version: 0.86.0
1 Like

I came across this issue in GH

Not sure how the issue got closed without any fix.
@rokbenko could you help to re-open the issue? Thanks.

No. I’m not part of CrewAI staff. I pinged them that the issue hasn’t been solved, but there has been no response so far.

I did some more digging.
CrewAI uses embedchain.ai under the covers for all embedding.
In my case (AWS Bedrock), assigning the embedding to an Agent works when configuring it according to embedchain.ai. The provider’s name is aws_bedrock, not bedrock (as per the CrewAI docs).

So, the following code works for me.


from crewai import Agent, Crew, Process, Task
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.project import CrewBase, agent, crew, task


@CrewBase
class Red01():
	"""Red01 crew"""
	agents_config = 'config/agents.yaml'
	tasks_config = 'config/tasks.yaml'

	@agent
	def researcher(self) -> Agent:

		content = "Users name is freddy12. He is 32 years old and lives in Lincon,UK."
		string_source = StringKnowledgeSource(
			content=content,
			metadata={"source": "user"},
		)
		return Agent(
			config=self.agents_config['researcher'],
			verbose=True,
			knowledge_sources=[string_source],
			embedder={
				"provider": "aws_bedrock",
				"config": {"model": "amazon.titan-embed-text-v2:0"},
			},
		)

	@task
	def research_task(self) -> Task:
		return Task(
			config=self.tasks_config['research_task'],
		)


	@crew
	def crew(self) -> Crew:
		"""Creates the Red01 crew"""

		content = "Users name is freddy12. He is 32 years old and lives in Lincon,UK."
		string_source = StringKnowledgeSource(
			content=content,
			metadata={"source": "user"},
		)	
			
		return Crew(
			agents=self.agents,
			tasks=self.tasks,
			process=Process.sequential,
			verbose=True,
			# knowledge_sources=[string_source],
			# embedder={
			# 	"provider": "aws_bedrock",
			# 	"config": {"model": "amazon.titan-embed-text-v2:0"},
			# },
		)

However, uncommenting what is commented out in the def crew function fails as crewai looks to be validating the provider aws_bedrock against an internal list – bedrock.

For Bedrock, make sure you add Boto3 by running:

uv add boto3

and you need a MODEL= env variable too.

docs will be updated on this as well.

Hi @subbu - Just catching up to this now. I will test this today and update you.

1 Like

@Mike_Watson, how are you setting your keys in the .env file?

Using the default profile in .env… Boto3 is installed.

Hey @tonykipkemboi, any update about this issue?