Getting error when using the knowledge and embedder configuration

I am using PDFKnowledge base
crew :

@crew
    def PlantUMLGenCrew(self) -> Crew:
        """Creates the Plantumlgenerator crew"""
        # To learn how to add knowledge sources to your crew, check out the documentation:
        # https://docs.crewai.comconcepts/knowledge#what-is-knowledge

        return Crew(
            agents=self.agents, # Automatically created by the @agent decorator
            tasks=self.tasks, # Automatically created by the @task decorator
            process=Process.sequential,
            knowledge_sources=[PlantUML_SqeuenceDiagram_KG_Source],
            embedder={
                "provider": "watson",
                "config": {"model": "ibm/slate-125m-english-rtrvr"},
            },
            full_output=True,
            verbose=True,
            # process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/
        )


Error:

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 16, in <module>
    from .crews.plantumlgenerator.src.plantumlgenerator.crew import Plantumlgenerator
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/crews/plantumlgenerator/src/plantumlgenerator/crew.py", line 11, in <module>
    from langchain_ibm.embeddings import WatsonxEmbeddings
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/langchain_ibm/__init__.py", line 1, in <module>
    from langchain_ibm.chat_models import ChatWatsonx
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/langchain_ibm/chat_models.py", line 24, in <module>
    from ibm_watsonx_ai import APIClient, Credentials  # type: ignore
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/__init__.py", line 19, in <module>
    from ibm_watsonx_ai.client import APIClient
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/client.py", line 29, in <module>
    from ibm_watsonx_ai.deployments import Deployments
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/deployments.py", line 44, in <module>
    from ibm_watsonx_ai.utils.autoai.utils import all_logging_disabled
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/utils/autoai/utils.py", line 71, in <module>
    import pandas as pd
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/__init__.py", line 46, in <module>
    from pandas.core.api import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/core/api.py", line 1, in <module>
    from pandas._libs import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/_libs/__init__.py", line 18, in <module>
    from pandas._libs.interval import Interval
  File "interval.pyx", line 1, in init pandas._libs.interval
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

crewai version = 0.86.0

when I’m not using the embedder attribute, then its asking for OPENAI_API key.

paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 16, in <module>
    from .crews.plantumlgenerator.src.plantumlgenerator.crew import Plantumlgenerator
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/src/javadesigndocgen/crews/plantumlgenerator/src/plantumlgenerator/crew.py", line 45, in <module>
    PlantUML_SqeuenceDiagram_KG_Source= PDFKnowledgeSource(
                                        ^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 51, in __init__
    self._set_embedder_config(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 174, in _set_embedder_config
    else self._create_default_embedding_function()
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 158, in _create_default_embedding_function
    return OpenAIEmbeddingFunction(
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/crewaiInteg_slc_code_explanation_project/step02_crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
    raise ValueError(
ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % 

I want to use only watson embedding model. So kindly help me on this.

@Paarttipaabhalaji Will let the CrewAI staff know about the issue and get back to you. It might be a bug.

Thank you for replying back.

please @rokbenko . Actually I am working on client pilot, I can’t implement this feature. its a tight deadline. kindly Fix it as soon as possible. I’m waiting for the fix.

@rokbenko I have created the issue in github.

Hi, I’m getting the same issue, trying to use watsonx. I’ve raised an issue here: [BUG] Watsonx as embedder is not working - script errors and stops · Issue #1790 · crewAIInc/crewAI · GitHub

there is an another issue in PDFKnowledgeSource


PlantUML_SqeuenceDiagram_PDF_Source= PDFKnowledgeSource(
    file_paths=["crewai_Knowledge_Sequence_Diagram.pdf"]
)

# Create knowledge with PDF source
PlantUML_SqeuenceDiagram_KG_Source = Knowledge(
    collection_name="pdf_knowledge",
    sources=[PlantUML_SqeuenceDiagram_PDF_Source],
    embedder_config={
    "provider": "watson",
    "config": {
        "model": "ibm/slate-125m-english-rtrvr",
        "base_url": base_url,
        "apikey": apikey,
        "projId":projId
    }
    }
)

Its throwing an error:

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
 
[2025-01-11 10:48:00][ERROR]: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf. Try adding sources to the knowledge directory. If it's inside the knowledge directory, use the relative path.
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 20, in <module>
    from .crews.internaldesigndoc.src.internaldesigndoc.crew import Internaldesigndoc
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/crews/internaldesigndoc/src/internaldesigndoc/crew.py", line 58, in <module>
    PlantUML_SqeuenceDiagram_PDF_Source= PDFKnowledgeSource(
                                         ^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/main.py", line 214, in __init__
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/_internal/_model_construction.py", line 126, in wrapped_model_post_init
    original_model_post_init(self, context)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pydantic/_internal/_model_construction.py", line 126, in wrapped_model_post_init
    original_model_post_init(self, context)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/source/base_file_knowledge_source.py", line 39, in model_post_init
    self.validate_content()
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/source/base_file_knowledge_source.py", line 56, in validate_content
    raise FileNotFoundError(f"File not found: {path}")
**FileNotFoundError: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf**
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.

I have tried with relative path also. My pdf is exist in knowledge directory only.

I’m using the crewai flow. The folder structure is

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % ls -lrt
total 1520
-rw-r--r--   1 paarttipaa  staff    2472 Jan  7 10:41 README.md
-rw-r--r--   1 paarttipaa  staff       0 Jan  7 10:41 Uml.md
drwxr-xr-x   4 paarttipaa  staff     128 Jan  7 10:41 dist
-rw-r--r--   1 paarttipaa  staff     476 Jan  7 10:41 pyproject-old.toml
-rw-r--r--@  1 paarttipaa  staff    5118 Jan  7 10:41 requirement.txt
drwxr-xr-x   3 paarttipaa  staff      96 Jan  7 10:41 src
-rw-r--r--@  1 paarttipaa  staff    4566 Jan  7 10:41 task1output.txt
-rw-r--r--@  1 paarttipaa  staff    5901 Jan  7 10:41 task2output.txt
-rw-r--r--   1 paarttipaa  staff   73892 Jan  7 10:41 taskoutput.json
drwxr-xr-x   2 paarttipaa  staff      64 Jan  7 10:41 tests
drwxr-xr-x   3 paarttipaa  staff      96 Jan  7 10:45 data
-rw-r--r--   1 paarttipaa  staff     547 Jan  8 09:32 pyproject.toml
-rw-r--r--   1 paarttipaa  staff  662082 Jan  8 09:32 uv.lock
drwxr-xr-x  13 paarttipaa  staff     416 Jan  9 23:31 PlantUMLDocs
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % 

Knowledge folder is available inside its respective crews.
Internally, By default the head directory of the pdf file pointing to “Knowledge” folder. This throwing an file not found error.

FileNotFoundError: File not found: knowledge/src/javadesigndocgen/crews/internaldesigndoc/knowledge/crewai_Knowledge_Sequence_Diagram.pdf

please resolve this issue. I really surprised how this error not caught during testing.

Possible solution:

For the crewai flow, there should be a common knowledge folder commonly for all the crews, should locate outside.
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro trail % ls -lrt
total 16
drwxr-xr-x  3 paarttipaa  staff    96 Jan 11 11:11 src
drwxr-xr-x  3 paarttipaa  staff    96 Jan 11 11:11 Knowledge
drwxr-xr-x  2 paarttipaa  staff    64 Jan 11 11:11 tests
-rw-r--r--  1 paarttipaa  staff   407 Jan 11 11:11 pyproject.toml
-rw-r--r--  1 paarttipaa  staff  2405 Jan 11 11:11 README.md
(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro 
```

Issue: 2

(javadesigndocgen) paarttipaa@Paarttipaabhalajis-MacBook-Pro javadesigndocgen % crewai flow kickoff
Running the Flow
Traceback (most recent call last):
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/bin/kickoff", line 7, in <module>
    from javadesigndocgen.main import kickoff
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/main.py", line 20, in <module>
    from .crews.internaldesigndoc.src.internaldesigndoc.crew import Internaldesigndoc
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/src/javadesigndocgen/crews/internaldesigndoc/src/internaldesigndoc/crew.py", line 63, in <module>
    PlantUML_SqeuenceDiagram_KG_Source = Knowledge(
                                         ^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/knowledge.py", line 39, in __init__
    self.storage = KnowledgeStorage(
                   ^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 55, in __init__
    self._set_embedder_config(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 200, in _set_embedder_config
    EmbeddingConfigurator().configure_embedder(embedder_config)
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/utilities/embedding_configurator.py", line 46, in configure_embedder
    return self.embedding_functions[provider](config, model_name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/crewai/utilities/embedding_configurator.py", line 150, in _configure_watson
    import ibm_watsonx_ai.foundation_models as watson_models
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/__init__.py", line 19, in <module>
    from ibm_watsonx_ai.client import APIClient
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/client.py", line 29, in <module>
    from ibm_watsonx_ai.deployments import Deployments
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/deployments.py", line 44, in <module>
    from ibm_watsonx_ai.utils.autoai.utils import all_logging_disabled
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/ibm_watsonx_ai/utils/autoai/utils.py", line 71, in <module>
    import pandas as pd
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/__init__.py", line 46, in <module>
    from pandas.core.api import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/core/api.py", line 1, in <module>
    from pandas._libs import (
  File "/Users/paarttipaa/ProjectTask/GithubProj/slc_code_explanation_project/SLC_Step02_Crewai/work/crewai/javadesigndocgen/.venv/lib/python3.12/site-packages/pandas/_libs/__init__.py", line 18, in <module>
    from pandas._libs.interval import Interval
  File "interval.pyx", line 1, in init pandas._libs.interval
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
An error occurred while running the flow: Command '['uv', 'run', 'kickoff']' returned non-zero exit status 1.