Getting error when using the knowledge and embedder configuration

I am using PDFKnowledge base
crew :

@crew
    def PlantUMLGenCrew(self) -> Crew:
        """Creates the Plantumlgenerator crew"""
        # To learn how to add knowledge sources to your crew, check out the documentation:
        # https://docs.crewai.comconcepts/knowledge#what-is-knowledge

        return Crew(
            agents=self.agents, # Automatically created by the @agent decorator
            tasks=self.tasks, # Automatically created by the @task decorator
            process=Process.sequential,
            knowledge_sources=[PlantUML_SqeuenceDiagram_KG_Source],
            embedder={
                "provider": "watson",
                "config": {"model": "ibm/slate-125m-english-rtrvr"},
            },
            full_output=True,
            verbose=True,
            # process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/
        )


Error:

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 16, in <module>
    from .crews.plantumlgenerator.src.plantumlgenerator.crew import Plantumlgenerator
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/crews/plantumlgenerator/src/plantumlgenerator/crew.py", line 11, in <module>
    from langchain_ibm.embeddings import WatsonxEmbeddings
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/langchain_ibm/__init__.py", line 1, in <module>
    from langchain_ibm.chat_models import ChatWatsonx
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/langchain_ibm/chat_models.py", line 24, in <module>
    from ibm_watsonx_ai import APIClient, Credentials  # type: ignore
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/__init__.py", line 19, in <module>
    from ibm_watsonx_ai.client import APIClient
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/client.py", line 29, in <module>
    from ibm_watsonx_ai.deployments import Deployments
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/deployments.py", line 44, in <module>
    from ibm_watsonx_ai.utils.autoai.utils import all_logging_disabled
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/utils/autoai/utils.py", line 71, in <module>
    import pandas as pd
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/__init__.py", line 46, in <module>
    from pandas.core.api import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/core/api.py", line 1, in <module>
    from pandas._libs import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/_libs/__init__.py", line 18, in <module>
    from pandas._libs.interval import Interval
  File "interval.pyx", line 1, in init pandas._libs.interval
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

crewai version = 0.86.0

when I’m not using the embedder attribute, then its asking for OPENAI_API key.

paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 16, in <module>
    from .crews.plantumlgenerator.src.plantumlgenerator.crew import Plantumlgenerator
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/crews/plantumlgenerator/src/plantumlgenerator/crew.py", line 45, in <module>
    PlantUML_SqeuenceDiagram_KG_Source= PDFKnowledgeSource(
                                        ^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 51, in __init__
    self._set_embedder_config(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 174, in _set_embedder_config
    else self._create_default_embedding_function()
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 158, in _create_default_embedding_function
    return OpenAIEmbeddingFunction(
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
    raise ValueError(
ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % 

I want to use only watson embedding model. So kindly help me on this.

@Paarttipaabhalaji Will let the CrewAI staff know about the issue and get back to you. It might be a bug.

Thank you for replying back.

please @rokbenko . Actually I am working on client pilot, I can’t implement this feature. its a tight deadline. kindly Fix it as soon as possible. I’m waiting for the fix.

@rokbenko I have created the issue in github.

Hi, I’m getting the same issue, trying to use watsonx. I’ve raised an issue here: [BUG] Watsonx as embedder is not working - script errors and stops · Issue #1790 · crewAIInc/crewAI · GitHub

there is an another issue in PDFKnowledgeSource


PlantUML_SqeuenceDiagram_PDF_Source= PDFKnowledgeSource(
    file_paths=["crewai_Knowledge_Sequence_Diagram.pdf"]
)

# Create knowledge with PDF source
PlantUML_SqeuenceDiagram_KG_Source = Knowledge(
    collection_name="pdf_knowledge",
    sources=[PlantUML_SqeuenceDiagram_PDF_Source],
    embedder_config={
    "provider": "watson",
    "config": {
        "model": "ibm/slate-125m-english-rtrvr",
        "base_url": base_url,
        "apikey": apikey,
        "projId":projId
    }
    }
)

Its throwing an error:

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
 
[2025-01-11 10:48:00][ERROR]: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf. Try adding sources to the knowledge directory. If it's inside the knowledge directory, use the relative path.
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 20, in <module>
    from .crews.internaldesigndoc.src.internaldesigndoc.crew import Internaldesigndoc
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/crews/internaldesigndoc/src/internaldesigndoc/crew.py", line 58, in <module>
    PlantUML_SqeuenceDiagram_PDF_Source= PDFKnowledgeSource(
                                         ^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/_internal/_model_construction.py", line 126, in wrapped_model_post_init
    original_model_post_init(self, context)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/_internal/_model_construction.py", line 126, in wrapped_model_post_init
    original_model_post_init(self, context)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/source/base_file_knowledge_source.py", line 39, in model_post_init
    self.validate_content()
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/source/base_file_knowledge_source.py", line 56, in validate_content
    raise FileNotFoundError(f"File not found: {path}")
**FileNotFoundError: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf**
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

I have tried with relative path also. My pdf is exist in knowledge directory only.

I’m using the crewai flow. The folder structure is

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % ls -lrt
total 1520
-rw-r--r--   1 paarttipaa  staff    2472 Jan  7 10:41 README.md
-rw-r--r--   1 paarttipaa  staff       0 Jan  7 10:41 Uml.md
drwxr-xr-x   4 paarttipaa  staff     128 Jan  7 10:41 dist
-rw-r--r--   1 paarttipaa  staff     476 Jan  7 10:41 pyproject-old.toml
-rw-r--r--@  1 paarttipaa  staff    5118 Jan  7 10:41 requirement.txt
drwxr-xr-x   3 paarttipaa  staff      96 Jan  7 10:41 src
-rw-r--r--@  1 paarttipaa  staff    4566 Jan  7 10:41 task1output.txt
-rw-r--r--@  1 paarttipaa  staff    5901 Jan  7 10:41 task2output.txt
-rw-r--r--   1 paarttipaa  staff   73892 Jan  7 10:41 taskoutput.json
drwxr-xr-x   2 paarttipaa  staff      64 Jan  7 10:41 tests
drwxr-xr-x   3 paarttipaa  staff      96 Jan  7 10:45 data
-rw-r--r--   1 paarttipaa  staff     547 Jan  8 09:32 pyproject.toml
-rw-r--r--   1 paarttipaa  staff  662082 Jan  8 09:32 uv.lock
drwxr-xr-x  13 paarttipaa  staff     416 Jan  9 23:31 PlantUMLDocs
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % 

Knowledge folder is available inside its respective crews.
Internally, By default the head directory of the pdf file pointing to “Knowledge” folder. This throwing an file not found error.

FileNotFoundError: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf

please resolve this issue. I really surprised how this error not caught during testing.

Possible solution:

For the crewai flow, there should be a common knowledge folder commonly for all the crews, should locate outside.
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro trail % ls -lrt
total 16
drwxr-xr-x  3 paarttipaa  staff    96 Jan 11 11:11 src
drwxr-xr-x  3 paarttipaa  staff    96 Jan 11 11:11 Knowledge
drwxr-xr-x  2 paarttipaa  staff    64 Jan 11 11:11 tests
-rw-r--r--  1 paarttipaa  staff   407 Jan 11 11:11 pyproject.toml
-rw-r--r--  1 paarttipaa  staff  2405 Jan 11 11:11 README.md
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro 
```

Issue: 2

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 20, in <module>
    from .crews.internaldesigndoc.src.internaldesigndoc.crew import Internaldesigndoc
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/crews/internaldesigndoc/src/internaldesigndoc/crew.py", line 63, in <module>
    PlantUML_SqeuenceDiagram_KG_Source = Knowledge(
                                         ^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/knowledge.py", line 39, in __init__
    self.storage = KnowledgeStorage(
                   ^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 55, in __init__
    self._set_embedder_config(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 200, in _set_embedder_config
    EmbeddingConfigurator().configure_embedder(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/utilities/embedding_configurator.py", line 46, in configure_embedder
    return self.embedding_functions[provider](config, model_name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/utilities/embedding_configurator.py", line 150, in _configure_watson
    import ibm_watsonx_ai.foundation_models as watson_models
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/__init__.py", line 19, in <module>
    from ibm_watsonx_ai.client import APIClient
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/client.py", line 29, in <module>
    from ibm_watsonx_ai.deployments import Deployments
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/deployments.py", line 44, in <module>
    from ibm_watsonx_ai.utils.autoai.utils import all_logging_disabled
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/utils/autoai/utils.py", line 71, in <module>
    import pandas as pd
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/__init__.py", line 46, in <module>
    from pandas.core.api import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/core/api.py", line 1, in <module>
    from pandas._libs import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/_libs/__init__.py", line 18, in <module>
    from pandas._libs.interval import Interval
  File "interval.pyx", line 1, in init pandas._libs.interval
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

@rokbenko @tonykipkemboi this error is occurs only in crewai flow. In normal crewai run, the embedder is working fine. please look into this watsonx embedder issue.

Is there any update on this? I’m also facing the issue while using Anthropic API for the crew