Hello guys, now that I was able to convert excel files to JSON, I would like to search in it and retreive some specific data for processing. From what I can see, the JSON String is not passed from the first agent to the second and then it hallucinates data.
I’ve tried:
- Passing the JSON as a string
- Saving the JSON to a file, then second agent is using JSONSearchTool (can see chroma db insert in console)
- Saving the JSON to a file, then second agent is using FileReadTool
I can’t see the first agent using this tool.
#crew.py
from crewai import Agent, Crew, Process, Task, LLM
from crewai.project import CrewBase, agent, crew, task
from crewai.agents.agent_builder.base_agent import BaseAgent
from typing import List
from inbtest.tools import ExcelTabToJSONTool
from crewai_tools import FileReadTool
from pydantic import BaseModel
from typing import List
llm = LLM(
model="openai/Llama-xLAM-2-8B-fc-r-F16.gguf",
base_url="xxx",
api_key="xxx",
temperature=0,
top_p=0.95,
max_tokens=150,
top_k=1,
repetition_penalty=1.15,
presence_penalty=0.5
)
xlsxTool = ExcelTabToJSONTool()
@CrewBase
class Inbtest():
"""Inbtest crew"""
agents: List[BaseAgent]
tasks: List[Task]
# Learn more about YAML configuration files here:
# Agents: https://docs.crewai.com/concepts/agents#yaml-configuration-recommended
# Tasks: https://docs.crewai.com/concepts/tasks#yaml-configuration-recommended
# If you would like to add tools to your agents, you can learn more about it here:
# https://docs.crewai.com/concepts/agents#agent-tools
@agent
def ExcelDataConvertor(self) -> Agent:
return Agent(
llm=llm,
config=self.agents_config['ExcelDataConvertor'], # type: ignore[index]
verbose=True,
tools=[ExcelTabToJSONTool(result_as_answer=True)]
)
@agent
def ContainerDataExtractor(self) -> Agent:
return Agent(
llm=llm,
config=self.agents_config['ContainerDataExtractor'], # type: ignore[index]
verbose=True,
)
@task
def ConvertExcelToJsonTask(self) -> Task:
return Task(
config=self.tasks_config['ConvertExcelToJsonTask'], # type: ignore[index]
verbose=True,
)
@task
def extract_specific_container_data_task(self) -> Task:
return Task(
config=self.tasks_config['extract_specific_container_data_task'], # type: ignore[index]
verbose=True,
)
@crew
def crew(self) -> Crew:
"""Creates the Inbtest crew"""
# To learn how to add knowledge sources to your crew, check out the documentation:
# https://docs.crewai.com/concepts/knowledge#what-is-knowledge
return Crew(
llm=llm,
agents=self.agents, # Automatically created by the @agent decorator
tasks=self.tasks, # Automatically created by the @task decorator
process=Process.sequential,
verbose=True,
# process=Process.hierarchical, # In case you wanna use that instead https://docs.crewai.com/how-to/Hierarchical/
)
#agents.yaml
ExcelDataConvertor:
role: >
Excel Data Convertor
goal: >
Convert all excel files in folder {source_folder} to a String having JSON characteristics
backstory: >
You are an expert in Excel processing. Your job is to convert a set of Excel files to a single string that will be used by other agents. This string has a JSON layout utf-8 encoded.
ContainerDataExtractor:
role: Container Data Extractor
goal: >
You receive a string formatted as a JSON representing container records encoded in utf-8.
Parse it yourself, extract exactly these fields from the record where "N. Conteneur" matches the input container_id:
- Reference
- Real promised date BY ROAD
- Real promised date BY RAIL
- Final Request delivery date
Return a JSON object matching this schema:
{
"Reference": string,
"Real_promised_date_BY_ROAD": string,
"Real_promised_date_BY_RAIL": string,
"Final _request_delivery_date": string,
"container_id": string
}
If no matching container is found, respond with:
{"error": "container ID not found", "container_id": string}
backstory: >
You are a logistics AI who processes structured JSON data to extract and format specific container fields.
#tasks.yaml
ConvertExcelToJsonTask:
description: >
Use the ExcelTabToJSONTool to convert all Excel files in the folder '{source_folder}' (sheet '{sheet_name}')
to JSON. You MUST use the tool everytime. The ExcelTabToJSONTool is the ONLY way to perform the task.
It will return the full content of this JSON generated as a string encoded in utf-8.
agent: ExcelDataConvertor
expected_output: >
A string that is representing a JSON encoded in UTF-8, containing all containers retreived using the tool ExcelTabToJSONTool.
extract_specific_container_data_task:
description: |
Given a string containing a list of container records,
parse it as a JSON to find the object where "N. Conteneur" equals '{container_id}'.
From that object, extract the following fields:
- Reference
- Real promised date BY ROAD
- Real promised date BY RAIL
- Final request delivery date
Return a list containing those 4 fields plus the container_id.
input:
json_data: "{{ ConvertExcelToJsonTask.output }}" # should be raw JSON string, not a list or wrapped tool call
container_id: "CONT1231456"
context: [ConvertExcelToJsonTask]
agent: ContainerDataExtractor
expected_output: a list with the four fields and the container_id
Finally, my custom tool ExcelTabToJSONTool
#excel_tab_to_json.py
#Require also __init__.py
#from .excel_tab_to_json import ExcelTabToJSONTool, ExcelTabToJSONInput
#
#__all__ = ["ExcelTabToJSONTool", "ExcelTabToJSONInput"]
from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
import pandas as pd
import os
import glob
import json
class ExcelTabToJSONInput(BaseModel):
"""Input schema for ExcelTabToJSONTool."""
source_folder: str = Field(..., description="Folder to search for XLSX files.")
sheet_name: str = Field(..., description="Name of the sheet/tab to convert.")
class ExcelTabToJSONTool(BaseTool):
name: str = "Excel Tab to JSON Converter"
description: str = (
"Searches all XLSX files in the specified folder for a given sheet name and writes its JSON to ./knowledge folder. "
"Handles French accents and Unicode characters safely."
)
args_schema: Type[BaseModel] = ExcelTabToJSONInput
def _run(self, source_folder: str, sheet_name: str) -> str:
xlsx_files = glob.glob(os.path.join(source_folder, '*.xlsx'))
if not xlsx_files:
return f"Error: No XLSX files found in {source_folder}."
output_folder = './knowledge'
os.makedirs(output_folder, exist_ok=True)
all_dfs = []
date_format = None # Set to your date format if known, else None
for file in xlsx_files:
try:
xl = pd.ExcelFile(file)
if sheet_name in xl.sheet_names:
df = xl.parse(sheet_name)
# Attempt to parse date/time columns
for col in df.columns:
try:
if date_format:
parsed = pd.to_datetime(df[col], format=date_format, errors='coerce')
else:
parsed = pd.to_datetime(df[col], errors='coerce')
if parsed.notna().any():
non_na = parsed.dropna()
if not non_na.empty and (non_na.dt.time == pd.Timestamp('00:00:00').time()).all():
df[col] = parsed.dt.strftime('%Y-%m-%d')
elif not non_na.empty and (non_na.dt.date == pd.Timestamp('1900-01-01').date()).all():
df[col] = parsed.dt.strftime('%H:%M')
else:
df[col] = parsed.dt.strftime('%Y-%m-%d')
except Exception:
continue # Not a date/time column
all_dfs.append(df)
except Exception as e:
continue # Try next file
if not all_dfs:
return f"Error: Sheet '{sheet_name}' not found in any XLSX file in {source_folder}."
merged_df = pd.concat(all_dfs, ignore_index=True)
json_str = merged_df.to_json(orient='records', force_ascii=False)
# Write merged JSON to a single file
output_path = os.path.join(output_folder, f"mergedxlsx.json")
with open(output_path, 'w', encoding='utf-8') as f:
f.write(json_str)
#print(output_path)
#return output_path
#print(json_str)
return json_str
The tool is working well as far as I saw.
It seems that issue is that task extract_specific_container_data_task doesn’t receive the JSON string from the first agent