Could you share the code in question? I’ll try troubleshooting this for you all this morning.
Made a few updates on an CSV (custom) Knowledge source that requires no additional configuration after being instantiated. Hope this can serve as some additional help or context. This is tested and working:
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
import csv
import hashlib
from typing import Dict, Any
from pydantic import Field
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from dotenv import load_dotenv
import os
load_dotenv()
# Create and configure knowledge storage
csv_storage: KnowledgeStorage = KnowledgeStorage(
embedder_config={
"provider": "azure",
"config": {
"model": "text-embedding-3-small",
"api_key": os.environ.get("AZURE_OPENAI_API_KEY")
}
})
csv_storage.initialize_knowledge_storage()
class AzureCSVKnowledgeSource(BaseKnowledgeSource):
"""Knowledge source that fetches data from Space News API."""
document_path: str = Field(
description="The relative paths to a document"
)
def __init__(self, document_path, storage: KnowledgeStorage = None):
super().__init__(document_path=document_path, storage=csv_storage)
self.document_path = document_path
if storage:
self.storage = storage
self.add()
def load_content(self) -> Dict[Any, str]:
""" Read the CSV file and return a dictionary containing all rows joined as a string.
This method demonstrates how to parse the CSV so that chunking can later
be done in a more structured way."""
try:
rows = []
with open(self.document_path, 'r', encoding='utf-8') as file:
reader = csv.reader(file)
# Convert each row (list of columns) into a single string
for row in reader:
row_as_text = ", ".join(row)
rows.append(row_as_text)
# Join the entire CSV into one text. If you prefer, you could also return
# the rows as a list and handle them directly in add().
content = "\n".join(rows)
return {"document_data": content}
except FileNotFoundError:
print("File not found!")
except PermissionError:
print("You don't have permission to access this file.")
except Exception as e:
print("An error occurred:", e)
return {}
def _chunk_csv_rows(self, rows: list[str], rows_per_chunk: int = 20) -> list[str]:
"""
A helper method specifically for CSV row-based chunking.
Groups every `rows_per_chunk` rows into a single text chunk.
"""
chunks = []
for i in range(0, len(rows), rows_per_chunk):
# Join a subset of rows into one chunk
chunk_rows = rows[i: i + rows_per_chunk]
chunk_text = "\n".join(chunk_rows)
chunks.append(chunk_text)
return chunks
def generate_unique_id(self, content: str) -> str:
"""Generate a unique ID using a hash of the content."""
return hashlib.sha256(content.encode('utf-8')).hexdigest()
def add(self) -> None:
"""
Load the CSV content, chunk it (row-based or character-based), and save it.
"""
content_dict = self.load_content()
if not content_dict:
return # In case of errors in load_content
# The full CSV as a single string
full_content = content_dict.get("document_data", "")
# Split that string back into row-based text so we can chunk by row count.
# If you'd rather keep it as columns or parse differently, adjust here.
rows = full_content.split("\n")
# You can tune `rows_per_chunk` according to your needs.
self.chunks = self._chunk_csv_rows(rows, rows_per_chunk=20)
# OPTIONAL: If you still want character-based chunking, you could use the inherited
# `_chunk_text` method instead:
# self.chunks = self._chunk_text(full_content)
# Create metadata for each chunk
self.metadata = []
for chunk in self.chunks:
self.metadata.append({"id": self.generate_unique_id(chunk)})
# Validate metadata and chunk alignment
if len(self.chunks) != len(self.metadata):
raise ValueError(
f"Mismatch in chunks and metadata lengths: "
f"{len(self.chunks)} vs {len(self.metadata)}"
)
# Save documents with the associated metadata
self.save_documents(metadata=self.metadata)
Given that your env variables and embedder attribute for Crew or Agent are correctly set up, all you have to do is instantiate your knowledge source. Example:
text_data = AzureDocKnowledgeSource(document_path="./knowledge_and_documentation/ascension.txt")
You can then pass this on to your Crew or Agent(s) as needed. You can alter the class slightly to take in a string instead of a file if needed.
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
# Create a knowledge source
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(
content=content,
)
# Create an LLM with a temperature of 0 to ensure deterministic outputs
llm = LLM(model="llama3", temperature=0)
# Create an agent with the knowledge store
agent = Agent(
role="About User",
goal="You know everything about the user.",
backstory="""You are a master at understanding people and their preferences.""",
verbose=True,
allow_delegation=False,
llm=llm,
)
task = Task(
description="Answer the following questions about the user: {question}",
expected_output="An answer to the question.",
agent=agent,
)
crew = Crew(
agents=[agent],
tasks=[task],
verbose=True,
process=Process.sequential,
knowledge_sources=[string_source],
embedder={
"provider": "ollama",
"config": {
"model": "nomic-embed-text",
"api_key": ""
}
}
)
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
crewai run
Running the Crew
warning: `VIRTUAL_ENV=/Users/ricram2/canada_tax/.venv` does not match the project environment path `.venv` and will be ignored
/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/pydantic/_internal/_config.py:345: UserWarning: Valid config keys have changed in V2:
* 'fields' has been removed
warnings.warn(message, UserWarning)
Traceback (most recent call last):
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/bin/run_crew", line 5, in <module>
from tax_crew.main import run
File "/Users/ricram2/canada_tax/crewai/tax_crew/src/tax_crew/main.py", line 5, in <module>
from tax_crew.crew import TaxCrew
File "/Users/ricram2/canada_tax/crewai/tax_crew/src/tax_crew/crew.py", line 7, in <module>
string_source = StringKnowledgeSource(
^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/pydantic/main.py", line 214, in __init__
validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 72, in __init__
self._set_embedder_config(embedder_config)
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 195, in _set_embedder_config
else self._create_default_embedding_function()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/crewai/knowledge/storage/knowledge_storage.py", line 179, in _create_default_embedding_function
return OpenAIEmbeddingFunction(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ricram2/canada_tax/crewai/tax_crew/.venv/lib/python3.11/site-packages/chromadb/utils/embedding_functions/openai_embedding_function.py", line 56, in __init__
raise ValueError(
ValueError: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys
An error occurred while running the crew: Command '['uv', 'run', 'run_crew']' returned non-zero exit status 1.
The issue is from the intantiation of the LLM class. Try this but make sure to modify the model to match what you have locally through ollama:
Pick one and use it below. Make sure you start with the ollama/{your-model}:
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
# Create a knowledge source
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(content=content)
# Create an LLM with a temperature of 0 to ensure deterministic outputs
llm = LLM(
model="ollama/llama3.2:latest", # run !ollama list to see models you have
temperature=0,
api_key=""
)
# Create an agent with the knowledge store
agent = Agent(
role="About User",
goal="You know everything about the user.",
backstory="""You are a master at understanding people and their preferences.""",
verbose=True,
allow_delegation=False,
llm=llm,
)
task = Task(
description="Answer the following questions about the user: {question}",
expected_output="An answer to the question.",
agent=agent,
)
crew = Crew(
agents=[agent],
tasks=[task],
verbose=True,
process=Process.sequential,
knowledge_sources=[string_source],
embedder={
"provider": "ollama",
"config": {
"model": "nomic-embed-text",
"api_key": ""
}
}
)
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
Nope. I did as you suggested. same issue. here is the code:
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
# Create a knowledge source
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(
content=content,
)
# Create an LLM with a temperature of 0 to ensure deterministic outputs
llm = LLM(model="llama3:latest", temperature=0)
# Create an agent with the knowledge store
agent = Agent(
role="About User",
goal="You know everything about the user.",
backstory="""You are a master at understanding people and their preferences.""",
verbose=True,
allow_delegation=False,
llm=llm,
)
task = Task(
description="Answer the following questions about the user: {question}",
expected_output="An answer to the question.",
agent=agent,
)
crew = Crew(
agents=[agent],
tasks=[task],
verbose=True,
process=Process.sequential,
knowledge_sources=[string_source],
embedder={
"provider": "ollama",
"config": {
"model": "nomic-embed-text:latest",
"api_key": ""
}
}
)
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
here is the ollama list
NAME ID SIZE MODIFIED
nomic-embed-text:latest 0a109f422b47 274 MB 6 days ago
llama3:latest 365c0bd3c000 4.7 GB 6 days ago
llama2:latest 78e26419b446 3.8 GB 6 days ago
what error did you gett his time around? also what system are you using and are you in a virtual enironment?
It is the same error with the OpenAI key, And yes using the project’s own venv
llm = LLM(
model="ollama/llama3.2:latest",
base_url="http://localhost:11434",
temperature=0.2,
embedder = {
"provider": "ollama",
"config": {
"model": "nomic-embed-text:latest",
"api_key": ""
}
}
)
This is how i have configured my LLM
I am using knowledge_sources in my crew
crew = Crew(
agents=[node_selector, flow_designer],
tasks=[select_node, setup_nodes],
process=Process.sequential,
verbose=True,
# planning=True, # Enable planning feature
knowledge_sources=[
text_source
]
)
Still getting following issue, my whole knowledge source is getting ignored by agent while answering
Failed to init knowledge: Please provide an OpenAI API key.
Are you comfortable sharing your code with me to test?
Please find below
import os
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.crew_docling_source import CrewDoclingSource
# Create a text file knowledge source
text_source = CrewDoclingSource(
file_paths=["remedy_tickets_info.md", "jira_tickets_info.md", "translation_service_info.md", "named_entity_recognition_service_info.md"]
)
llm = LLM(
model="ollama/llama3.2:latest",
base_url="http://localhost:11434",
temperature=0.2,
embedder = {
"provider": "ollama",
"config": {
"model": "nomic-embed-text:latest",
"api_key": ""
}
}
)
# Create agents
node_selector = Agent(
role='Node selector',
goal='Select nodes from a list of nodes to accomplish a given task',
backstory='You are an expert is selecting nodes. If some task is given to you, you can decide which nodes will be useful'
'to finish a task. Once you have shortlisted the nodes you give these nodes to flow designer. Flow designer is epert in arranging the nodes'
'in correct order to sinish the task. remember your job is to select the most appropriate nodes only',
verbose=True,
max_iter=2,
llm=llm,
# knowledge_sources=[
# text_source
# ]
)
flow_designer = Agent(
role='Flow designer',
goal='Based on the nodes given to you, arrange them in certain order and explain how to use them to finish the given task succesfully.',
backstory='You are an expert is usng nodes given to you by Node Selector'
'Once you have received the nodes you know how to arrange them in certain order to finish the task. You think carfully multiple times'
'to best organise nodes in the most appropriate order.',
verbose=True,
max_iter=2,
llm=llm,
# knowledge_sources=[
# text_source
# ]
)
# Define tasks
select_node = Task(
description='Select Nodes to convert a remedy ticket into a servicenow card.',
expected_output='Give a list of nodes which can be useful to finish this task. In the output only include node names, each node should be'
'in a new line. Only one node name should be there in one line. If you have selected multiple nodes as output put each one of there name'
'in a new line',
agent=node_selector
)
setup_nodes = Task(
description='Node selector has done selection of nodes for you, your job is to arrange these nodes in appropriate order and explain how they cna be used to'
'convert a remedy ticket into service now card',
expected_output='Explain the ordering of nodes and how it can be used to achive the task',
agent=flow_designer,
output_file='node-setup/remedy-to-servicenow.md', # Node selection and use will be explained here
context=[select_node]
)
# Assemble a crew with planning enabled
crew = Crew(
agents=[node_selector, flow_designer],
tasks=[select_node, setup_nodes],
process=Process.sequential,
verbose=True,
# planning=True, # Enable planning feature
knowledge_sources=[
text_source
]
)
# Execute tasks
crew_output = crew.kickoff()
# Accessing the crew output
print(f"Raw Output: {crew_output.raw}")
print(f"Tasks Output: {crew_output.tasks_output}")
print(f"Token Usage: {crew_output.token_usage}")
print(crew.usage_metrics)
Output:
Try setting your .env info here Quickstart - CrewAI
This is not the issue.