I’ve built a CodeUnderstandingCrew
using CrewAI to analyze legacy code files with structured insights. This crew leverages Pydantic models to output clean, detailed information about code functionality, database operations, security concerns, and more.
This is the code:
from crewai import Agent, Crew, Process, Task
from crewai.project import CrewBase, agent, crew, task
from pydantic import BaseModel, Field
from typing import List, Optional, Literal
# Define Pydantic models for structured output
class SecurityConcern(BaseModel):
type: str = Field(description="Vulnerability type")
description: str = Field(description="Brief description of the vulnerability")
severity: Literal["Critical", "High", "Medium", "Low"] = Field(description="Severity level")
line_number: Optional[int] = Field(description="Line number if applicable")
class DatabaseOperation(BaseModel):
operation: str = Field(description="Type of database operation")
tables: List[str] = Field(description="Tables involved")
purpose: str = Field(description="What this operation accomplishes")
class ExternalDependency(BaseModel):
"""Model for external dependencies"""
name: str = Field(description="Name of the dependency")
type: str = Field(description="Type of dependency")
purpose: str = Field(description="What this dependency is used for")
class CodeAnalysis(BaseModel):
"""Structured output for code analysis task"""
file_path: str = Field(description="Path to the analyzed file")
file_functionality: str = Field(description="Main functionality of the file")
functions: List[str] = Field(description="List of functions/methods identified")
database_operations: List[DatabaseOperation] = Field(description="Database operations found")
security_concerns: List[SecurityConcern] = Field(description="Security vulnerabilities identified")
dependencies: List[ExternalDependency] = Field(description="External dependencies identified")
code_quality_score: int = Field(description="1-10 code quality rating", ge=1, le=10)
integration_points: List[str] = Field(description="Integration points with other components")
@CrewBase
class CodeUnderstandingCrew():
"""CodeUnderstandingCrew crew for analyzing legacy code files"""
agents_config = 'config/agents.yaml'
tasks_config = 'config/tasks.yaml'
@agent
def code_analyzer(self) -> Agent:
return Agent(
config=self.agents_config['code_analyzer'],
verbose=True,
output_pydantic=CodeAnalysis
)
@task
def analyze_code_structure(self) -> Task:
return Task(
config=self.tasks_config['analyze_code_structure'],
output_pydantic=CodeAnalysis
)
@crew
def crew(self) -> Crew:
"""Creates the CodeUnderstandingCrew crew"""
return Crew(
agents=self.agents,
tasks=self.tasks,
process=Process.sequential,
verbose=True,
)
I’m running this crew as part of a larger flow with multiple inputs, and it’s working well—except for the last input, which doesn’t return the Pydantic output as expected. All others work fine.
If you’ve seen this behavior or have tips on ensuring consistent Pydantic output across multiple inputs in a batch flow, please share how you fixed it