Help me understand what I am missing while trying to use a custom LLM with vLLM (by loading a model from local path - offline inferencing). Here is the full code.
# Install required packages (run in terminal if needed):
# pip install vllm>=0.6.0 crewai pillow requests huggingface_hub
import os
from crewai import LLM as CrewLLM
from vllm import LLM as VLLM_LLM, SamplingParams
from crewai import Agent, Task, Crew
from PIL import Image
from typing import Any, Dict, List, Optional, Union
from dotenv import load_dotenv
load_dotenv()
# Custom LLM wrapper for CrewAI compatibility
class VLLMWrapper(CrewLLM):
def __init__(self, vllm_instance: VLLM_LLM, sampling_params: SamplingParams):
self.vllm = vllm_instance
self.sampling_params = sampling_params
def __call__(self, prompt: str, **kwargs) -> str:
"""
CrewAI expects an LLM to have a __call__ method that takes a text prompt and returns a string.
We format the prompt for VLLM's multimodal API.
"""
# Extract image path from prompt (assuming prompt contains "Image path: <path>")
image_path = None
if "Image path:" in prompt:
try:
start = prompt.index("Image path:") + len("Image path:")
end = prompt.index(".", start) if "." in prompt[start:] else len(prompt)
image_path = prompt[start:end].strip()
# Remove image path from text prompt to avoid duplication
text_prompt = prompt[:start] + prompt[end:]
except ValueError:
text_prompt = prompt
else:
text_prompt = prompt
# Format prompt for VLLM multimodal input
formatted_prompt = f"{text_prompt.strip()} <image>{image_path}</image>" if image_path and os.path.exists(image_path) else text_prompt.strip()
# Generate response using VLLM's generate API
try:
outputs = self.vllm.generate([formatted_prompt], sampling_params=self.sampling_params)
return outputs[0].outputs[0].text.strip()
except Exception as e:
return f"Error generating response: {str(e)}"
# Set Hugging Face token (if using online model)
# os.environ["HUGGING_FACE_HUB_TOKEN"] = "your_hf_token_here" # Replace with your token
# Model configuration
VLLM_TENSOR_PARALLEL_SIZE = 1
VLLM_GPU_MEMORY_UTILIZATION = 0.85
MAX_MODEL_LEN = 8192
MAX_NUM_SEQS = 2
MAX_GENERATION_TOKENS = 75
# Use local model path or Hugging Face repository
model_path = f"{os.environ.get('model_path', '')}/meta-llama/Llama-3.2-11B-Vision-Instruct"
if not os.path.exists(model_path):
model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct" # Fallback to Hugging Face ID
# Initialize VLLM
try:
vllm_llama = VLLM_LLM(
model=model_path,
tensor_parallel_size=VLLM_TENSOR_PARALLEL_SIZE,
gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
max_model_len=MAX_MODEL_LEN,
max_num_seqs=MAX_NUM_SEQS,
trust_remote_code=True,
dtype="bfloat16",
enforce_eager=True,
limit_mm_per_prompt={"image": 1}
)
except ValueError as e:
raise ValueError(f"Failed to initialize VLLM: {e}. Ensure the model path or ID is valid.")
# Define sampling parameters
sampling_params = SamplingParams(
temperature=0,
max_tokens=MAX_GENERATION_TOKENS
)
# Create VLLM wrapper for CrewAI
llm = VLLMWrapper(vllm_llama, sampling_params)
# Load a local image
image_path = "/blob_temp_path/image.jpg"
try:
Image.open(image_path)
except FileNotFoundError:
raise FileNotFoundError(f"Image not found at {image_path}. Please verify the path.")
# Create a CrewAI Agent
agent = Agent(
role="Image Analyst",
goal="Analyze images and text to provide insights",
backstory="You are an AI with expertise in vision and language processing.",
llm=llm, # Pass the VLLM wrapper
verbose=True
)
# Define a task with multimodal input
task = Task(
description=(
f"Analyze the following image and describe its contents in detail. "
f"Image path: {image_path}. Additional context: The image is from a recent event."
),
expected_output="A detailed description of the image contents.",
agent=agent
)
# Create and run the Crew
crew = Crew(
agents=[agent],
tasks=[task],
verbose=True
)
try:
result = crew.kickoff()
print("Crew Output:", result)
except Exception as e:
print(f"Crew execution failed: {e}")