I finally found the issue.
When the model hallucinates a tool call, Langchain replies by saying that the tool call is invalid; then it creates a new request to the model provider.
Because the invalid tool call is still in the messages history, the provider verifies each message, detects the invalid tool call, and then returns the error.
The fix I did is creating a middleware that renames all the tool calls that are invalid before sending the request:
# Mistral requires tool names to match: a-z, A-Z, 0-9, underscores, dashes,
# non-consecutive dots, max 256 chars.
_VALID_TOOL_NAME_RE = re.compile(r"^[a-zA-Z0-9_\-]+(\.[a-zA-Z0-9_\-]+)*$")
_MAX_TOOL_NAME_LEN = 256
def _is_valid_tool_name(name: str) -> bool:
"""Check whether *name* is acceptable as a Mistral function name."""
return (
bool(name)
and len(name) <= _MAX_TOOL_NAME_LEN
and _VALID_TOOL_NAME_RE.match(name) is not None
)
def _build_error_content(tool_call) -> str:
"""Build an informative error message explaining what the LLM tried to do."""
attempted_name = tool_call.get("name", "<unknown>")
attempted_args = tool_call.get("args", {})
try:
args_str = json.dumps(attempted_args, ensure_ascii=False, indent=2)
except (TypeError, ValueError):
args_str = str(attempted_args)
return (
f"ERROR: Invalid tool call. The tool name you provided is not valid.\n"
f"Attempted tool name: {attempted_name!r}\n"
f"Attempted arguments: {args_str}\n\n"
f"Tool names must only contain letters (a-z, A-Z), digits (0-9), "
f"underscores, dashes, and non-consecutive dots, with a maximum length "
f"of {_MAX_TOOL_NAME_LEN} characters.\n"
f"Please retry using one of the tools available to you."
)
def _sanitize_messages(messages):
"""Sanitize message history *before* sending it to the LLM provider.
Scans every ``AIMessage`` in the history for tool calls whose ``name``
field violates the provider naming rules (e.g. Mistral requires
``[a-zA-Z0-9_\\-]+(\\.[a-zA-Z0-9_\\-]+)*``, max 256 chars).
For each invalid tool call found:
- The ``name`` is replaced with ``invalid_tool_call`` so the provider
accepts the message.
- A synthetic ``ToolMessage`` is inserted right after the ``AIMessage``
(if one is not already present for that call) so the LLM understands
what went wrong.
Operating on ``request.messages`` instead of the model response keeps
the backend and frontend message histories in sync: the frontend never
sees a tool-call name that differs from what is stored.
Returns a new list of messages (the original list is not mutated).
"""
new_messages: list = []
for msg in messages:
if not isinstance(msg, AIMessage) or not msg.tool_calls:
new_messages.append(msg)
continue
sanitized_calls = []
synthetic_tool_messages: list[ToolMessage] = []
had_invalid = False
for tc in msg.tool_calls:
name = tc.get("name", "")
if _is_valid_tool_name(name):
sanitized_calls.append(tc)
else:
had_invalid = True
tc_id = tc.get("id", "unknown")
logger.warning(
"Sanitized invalid tool call name=%r (id=%s) in message "
"history. Replacing with 'invalid_tool_call'.",
name,
tc_id,
)
# Replace name so the provider accepts the history
sanitized_calls.append({**tc, "name": "invalid_tool_call"})
# Prepare a synthetic ToolMessage in case there isn't one
# already in the history for this call
synthetic_tool_messages.append(
ToolMessage(
content=_build_error_content(tc),
tool_call_id=tc_id,
)
)
if had_invalid:
# Rebuild the AIMessage with sanitized tool_calls
new_ai = AIMessage(
content=msg.content,
tool_calls=sanitized_calls,
additional_kwargs=msg.additional_kwargs,
response_metadata=msg.response_metadata,
id=msg.id,
)
new_messages.append(new_ai)
# Only add synthetic ToolMessages for calls that don't already
# have a corresponding ToolMessage later in the history
existing_tool_msg_ids = {
m.tool_call_id for m in messages if isinstance(m, ToolMessage)
}
for tm in synthetic_tool_messages:
if tm.tool_call_id not in existing_tool_msg_ids:
new_messages.append(tm)
else:
new_messages.append(msg)
return new_messages
class SanitizeToolCallsMiddleware(AgentMiddleware):
"""Sanitize invalid tool-call names in the message history *before*
sending it to the LLM provider.
When a model (typically Mistral) hallucinates a tool name that does not
conform to the provider's naming rules, re-sending the conversation
history on the next turn would cause a 400 error because the ``AIMessage``
contains an illegal ``name`` field.
By sanitizing ``request.messages`` (the input) rather than the model
response (the output), the corrected messages are written to the graph
state and saved to DB, keeping the backend and frontend in sync.
For each invalid tool call found in the history:
- The ``name`` is replaced with ``invalid_tool_call``.
- A synthetic ``ToolMessage`` is inserted (if not already present) so the
LLM understands what the previous attempt was and can self-correct.
"""
def wrap_model_call(self, request, handler):
request.messages = _sanitize_messages(request.messages)
return handler(request)
async def awrap_model_call(self, request, handler):
request.messages = _sanitize_messages(request.messages)
return await handler(request)