Help ::: How to use a custom (local) LLM with vLLM

likhithananda · May 11, 2025, 7:50pm

Help me understand what I am missing while trying to use a custom LLM with vLLM (by loading a model from local path - offline inferencing). Here is the full code.

# Install required packages (run in terminal if needed):
# pip install vllm>=0.6.0 crewai pillow requests huggingface_hub

import os
from crewai import LLM as CrewLLM
from vllm import LLM as VLLM_LLM, SamplingParams
from crewai import Agent, Task, Crew
from PIL import Image
from typing import Any, Dict, List, Optional, Union
from dotenv import load_dotenv
load_dotenv()

# Custom LLM wrapper for CrewAI compatibility
class VLLMWrapper(CrewLLM):
  def __init__(self, vllm_instance: VLLM_LLM, sampling_params: SamplingParams):
      self.vllm = vllm_instance
      self.sampling_params = sampling_params

  def __call__(self, prompt: str, **kwargs) -> str:
      """
      CrewAI expects an LLM to have a __call__ method that takes a text prompt and returns a string.
      We format the prompt for VLLM's multimodal API.
      """
      # Extract image path from prompt (assuming prompt contains "Image path: <path>")
      image_path = None
      if "Image path:" in prompt:
          try:
              start = prompt.index("Image path:") + len("Image path:")
              end = prompt.index(".", start) if "." in prompt[start:] else len(prompt)
              image_path = prompt[start:end].strip()
              # Remove image path from text prompt to avoid duplication
              text_prompt = prompt[:start] + prompt[end:]
          except ValueError:
              text_prompt = prompt
      else:
          text_prompt = prompt

      # Format prompt for VLLM multimodal input
      formatted_prompt = f"{text_prompt.strip()} <image>{image_path}</image>" if image_path and os.path.exists(image_path) else text_prompt.strip()

      # Generate response using VLLM's generate API
      try:
          outputs = self.vllm.generate([formatted_prompt], sampling_params=self.sampling_params)
          return outputs[0].outputs[0].text.strip()
      except Exception as e:
          return f"Error generating response: {str(e)}"

# Set Hugging Face token (if using online model)
# os.environ["HUGGING_FACE_HUB_TOKEN"] = "your_hf_token_here"  # Replace with your token

# Model configuration
VLLM_TENSOR_PARALLEL_SIZE = 1
VLLM_GPU_MEMORY_UTILIZATION = 0.85
MAX_MODEL_LEN = 8192
MAX_NUM_SEQS = 2
MAX_GENERATION_TOKENS = 75

# Use local model path or Hugging Face repository
model_path = f"{os.environ.get('model_path', '')}/meta-llama/Llama-3.2-11B-Vision-Instruct"
if not os.path.exists(model_path):
  model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"  # Fallback to Hugging Face ID

# Initialize VLLM
try:
  vllm_llama = VLLM_LLM(
      model=model_path,
      tensor_parallel_size=VLLM_TENSOR_PARALLEL_SIZE,
      gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
      max_model_len=MAX_MODEL_LEN,
      max_num_seqs=MAX_NUM_SEQS,
      trust_remote_code=True,
      dtype="bfloat16",
      enforce_eager=True,
      limit_mm_per_prompt={"image": 1}
  )
except ValueError as e:
  raise ValueError(f"Failed to initialize VLLM: {e}. Ensure the model path or ID is valid.")

# Define sampling parameters
sampling_params = SamplingParams(
  temperature=0,
  max_tokens=MAX_GENERATION_TOKENS
)

# Create VLLM wrapper for CrewAI
llm = VLLMWrapper(vllm_llama, sampling_params)

# Load a local image
image_path = "/blob_temp_path/image.jpg"
try:
  Image.open(image_path)
except FileNotFoundError:
  raise FileNotFoundError(f"Image not found at {image_path}. Please verify the path.")

# Create a CrewAI Agent
agent = Agent(
  role="Image Analyst",
  goal="Analyze images and text to provide insights",
  backstory="You are an AI with expertise in vision and language processing.",
  llm=llm,  # Pass the VLLM wrapper
  verbose=True
)

# Define a task with multimodal input
task = Task(
  description=(
      f"Analyze the following image and describe its contents in detail. "
      f"Image path: {image_path}. Additional context: The image is from a recent event."
  ),
  expected_output="A detailed description of the image contents.",
  agent=agent
)

# Create and run the Crew
crew = Crew(
  agents=[agent],
  tasks=[task],
  verbose=True
)

try:
  result = crew.kickoff()
  print("Crew Output:", result)
except Exception as e:
  print(f"Crew execution failed: {e}")

zinyando · May 13, 2025, 6:15am

CrewAI uses LiteLLM under the hood so configuration should be fairly simple since LiteLLM support vLLM ootb.

Take a look at the documentation and see if that helps you VLLM | liteLLM

system · June 10, 2025, 7:51pm

This topic was automatically closed after 30 days. New replies are no longer allowed.

Topic		Replies	Views
Llm connection to local server CrewAI Community Support	2	209	March 17, 2025
How to use the qwen2.5-vl-3b-instruct model with the CrewAi? LLMs llama-31-8b	3	383	April 6, 2025
Connect-crewai-to-llms CrewAI Community Support agent	7	836	September 18, 2024
I want to use the local LLM via Ollama, but I'm facing some issues. It keeps asking me for the OpenAI API key CrewAI Community Support agent , crewai	7	1100	March 13, 2025
Is it possible to use in an agent a Hugging Face model downloaded to a local folder and call it with the Hugging Face transformers library? General	13	913	February 7, 2025

Help ::: How to use a custom (local) LLM with vLLM

Related topics