I am creating an agent which was working perfectly fine if I just passed the tools as arguments to the create_agent() method.
class Citation(BaseModel):
"""Citation information for a document."""
document_id: str = Field(description="The unique identifier of the document")
title: str = Field(description="The title of the document which was cited")
reference_text: str = Field(description="The text snippet from the document that was referenced")
class QueryResponse(BaseModel):
"""Final user query response with answer and citations"""
answer: str = Field(description="The agent's final answer")
citations: list[Citation] = Field(description="List of citations used to generate the answer")
class VLMAgent:
""" Wrapper class for the VLM agent. """
def __init__(self):
self._vlm = None
self._tools = None
self._checkpointer = None
self._embeddings = None
self._vlm_agent = None
def initialize(self, embeddings: HuggingFaceEmbeddings, vlm_manager: LanguageModelManager):
if self._vlm_agent is None:
self._vlm = vlm_manager.vlm
self._embeddings = embeddings
self._checkpointer = get_redis_checkpointer()
self._tools = [
get_documents_metadata,
get_documents_content,
create_vector_search_tool(self._embeddings),
]
# Get middleware list from factory
guardrails_middleware = create_guardrails_middleware(
guardrails_llm=vlm_manager.guardrails_vlm,
max_retries=3
)
self._vlm_agent = create_agent(
self._vlm,
tools=self._tools,
response_format=ToolStrategy(QueryResponse),
context_schema=CustomAgentState,
system_prompt=system_prompt,
checkpointer=self._checkpointer,
middleware=guardrails_middleware + [
SummarizationMiddleware(
model=self._vlm,
trigger=("messages", 20), # Trigger earlier to prevent token overflow
keep=("messages", 4), # Keep fewer messages to maintain context within limits
),
],
)
Once I also added the response_format schema, I started receiving the following error:
Traceback (most recent call last):
File “/mnt/d/Miscellaneous/Projects/backend_vlm/src/application/chat_service/streaming.py”, line 88, in stream_agent_response
for chunk in vlm_agent.stream(
File “/home/alex/venvs/lib/python3.10/site-packages/langgraph/pregel/main.py”, line 2633, in stream
for _ in runner.tick(
File “/home/alex/venvs/lib/python3.10/site-packages/langgraph/pregel/_runner.py”, line 167, in tick
run_with_retry(
File “/home/alex/venvs/lib/python3.10/site-packages/langgraph/pregel/_retry.py”, line 42, in run_with_retry
return task.proc.invoke(task.input, config)
File “/home/alex/venvs/lib/python3.10/site-packages/langgraph/_internal/_runnable.py”, line 656, in invoke
input = context.run(step.invoke, input, config, **kwargs)
File “/home/alex/venvs/lib/python3.10/site-packages/langgraph/_internal/_runnable.py”, line 400, in invoke
ret = self.func(*args, **kwargs)
File “/home/alex/venvs/lib/python3.10/site-packages/langchain/agents/factory.py”, line 1132, in model_node
response = wrap_model_call_handler(request, _execute_model_sync)
File “/home/alex/venvs/lib/python3.10/site-packages/langchain/agents/factory.py”, line 146, in normalized_single
result = single_handler(request, handler)
File “/home/alex/venvs/lib/python3.10/site-packages/langchain/agents/middleware/types.py”, line 1672, in wrapped
return func(request, handler)
File “/mnt/d/Miscellaneous/Projects/backend_vlm/src/application/chat_service/middlewares.py”, line 414, in retry_on_error
raise last_error
File “/mnt/d/Miscellaneous/Projects/backend_vlm/src/application/chat_service/middlewares.py”, line 409, in retry_on_error
return handler(request)
File “/home/alex/venvs/lib/python3.10/site-packages/langchain/agents/factory.py”, line 1097, in execute_model_sync
model, effective_response_format = _get_bound_model(request)
File “/home/alex/venvs/lib/python3.10/site-packages/langchain/agents/factory.py”, line 1074, in _get_bound_model
request.model.bind_tools(
File “/home/alex/venvs/lib/python3.10/site-packages/langchain_huggingface/chat_models/huggingface.py”, line 968, in bind_tools
raise ValueError(msg)
ValueError: When specifying tool_choice, you must provide exactly one tool. Received 4 tools.
I tried the same thing using a locally deployed Ollama model and it seemed to work fine, I did not receive any error and the final response had the final answer and citations fields.
Is this an issue only with the HuggingFace chat models?