Validating a task actually called a tool

I’m dealing with a lot of hallucination, basically because the task didn’t call the tool it was suppose to. I’m trying to validate if the results of a task was due to a tool call and if it just made up data. Looked into the guardrails but didn’t much in regards to look at previous task run parameters like I see the the Otel telemetry.

Is this possible since I see this in the output sometimes (sometime not):

Status: ✅ Completed
└── 🔧 Used search (1)

Thanks

@crew_193, you might try -

create diag.py with -

import json
from typing import List, Union, Tuple, Dict

from crewai.agents.parser import AgentAction, AgentFinish
from crewai.tools.tool_types import ToolResult

agent_call_number = 0
task_call_number = 0

def print_task_output(task_output: Union[str, List[Tuple[Dict, str]]], task_name: str = 'Generic task'):
    global task_call_number
    task_call_number += 1
    with open("log-task-callback.txt", "a") as log_file:
        print(f"{f'---CALL {task_call_number}---Unknown task_output format':-<60}", file=log_file)
        print(f"Task Name: {task_name}", file=log_file)
        print(f"type: {type(task_output)}", file=log_file)
        print(f"value: {task_output}", file=log_file)


def print_agent_output(agent_output: Union[str, List[Tuple[Dict, str]], AgentFinish], agent_name: str = 'Generic call'):
    global agent_call_number  # Declare call_number as a global variable
    agent_call_number += 1
    with open("log-crew-callback.txt", "a") as log_file:
        # Try to parse the output if it is a JSON string
        if isinstance(agent_output, str):
            try:
                agent_output = json.loads(agent_output)  # Attempt to parse the JSON string
                print("agent_output parsed as json", file=log_file)
            except json.JSONDecodeError:
                print("agent_output is not in json format", file=log_file)
                pass  # If there's an error, leave agent_output as is

        # Check if the output is a list of tuples as in the first case
        if isinstance(agent_output, list) and all(isinstance(item, tuple) for item in agent_output):
            print(f"{f'---CALL {agent_call_number}---Dict':-<60}", file=log_file)
            for action, description in agent_output:
                # Print attributes based on assumed structure
                print(f"Agent Name: {agent_name}", file=log_file)
                print(f"Tool used: {getattr(action, 'tool', 'Unknown')}", file=log_file)
                print(f"Tool input: {getattr(action, 'tool_input', 'Unknown')}", file=log_file)
                print(f"Action log: {getattr(action, 'log', 'Unknown')}", file=log_file)
                print(f"Description: {description}", file=log_file)

        # Check if the output is a dictionary as in the second case
        elif isinstance(agent_output, AgentFinish):
            print(f"{f'---CALL {agent_call_number}---AgentFinish':-<60}", file=log_file)
            print(f"Agent Name: {agent_name}", file=log_file)
            print(f"Thought: {agent_output.thought}", file=log_file)
            print(f"AgentFinish: {agent_output}", file=log_file)

        # ...and AgentAction
        elif isinstance(agent_output, AgentAction):
            print(f"{f'---CALL {agent_call_number}---AgentAction':-<60}", file=log_file)
            print(f"Agent Name: {agent_name}", file=log_file)
            print(f"Thought: {agent_output.thought}", file=log_file)
            print(f"Tool: {agent_output.tool}", file=log_file)
            print(f"Tool_input: {agent_output.tool_input}", file=log_file)
            print(f"Text: {agent_output.text}", file=log_file)
            print(f"Result: {agent_output.result}", file=log_file)

        # ...and ToolResult
        elif isinstance(agent_output, ToolResult):
            print(f"{f'---CALL {agent_call_number}---ToolResult':-<60}", file=log_file)
            print(f"Agent Name: {agent_name}", file=log_file)
            print(f"Result: (as answer={agent_output.result_as_answer}) {agent_output.result}", file=log_file)

        # Handle unexpected formats
        else:
            # If the format is unknown, print out the input directly
            print(f"{f'---CALL {agent_call_number}---Unknown agent_output format':-<60}", file=log_file)
            print(f"type: {type(agent_output)}", file=log_file)
            print(f"value: {agent_output}", file=log_file)


if __name__ == "__main__":
    print_agent_output('test', 'test')

and add a step_callback to your agent(s) -

    @agent
    def data_engineer(self) -> Agent:
        return Agent(
            config=self.agents_config["data_engineer"],
            llm=self.llm,
            tools=[my_custom_tool],
            step_callback=lambda x: diag.print_agent_output(x, "data engineer"),
            }
        )

Thank you, I’ll look into this and see what I can find.