For folks who are on AWS BedRock and are interested in using Kimi K2 Thinking, the following code seems to work decently well. It is a bit hacky, and i don’t think covers all the edge cases, but the main idea is to wrap the model call with middleware that can parse the “tool calling” text from either the reasoning_content or text content messages.
import json
import re
import uuid
import logging
from typing import Callable, List, Dict, Any, Awaitable
from langchain_core.messages import AIMessage
from langchain.agents.middleware import wrap_model_call, ModelRequest, ModelResponse
logger = logging.getLogger(__name__)
def get_best_tool_match(raw_name: str, available_tool_names: List[str]) -> str:
clean = raw_name.replace(':', '_').replace('.', '_').replace('functions_', '').strip()
for valid in available_tool_names:
if valid == clean or valid in clean or clean in valid:
return valid
return clean
def parse_kimi_tool_calls(text: str, available_tool_names: List[str]) -> List[Dict[str, Any]]:
# Pattern to match Kimi's XML-style tags
pattern = r"<\|tool_call_begin\|>(.*?)<\|tool_call_argument_begin\|>(.*?)<\|tool_call_end\|>"
matches = re.finditer(pattern, text, re.DOTALL)
calls = []
for m in matches:
raw_name = m.group(1).strip()
try:
args = json.loads(m.group(2).strip())
except:
args = {"input": m.group(2).strip()}
calls.append({
"name": get_best_tool_match(raw_name, available_tool_names),
"args": args,
"type": "tool_call"
})
return calls
@wrap_model_call
async def kimi_k2_adjustment_middleware(
request: ModelRequest,
handler: Callable[[ModelRequest], Awaitable[ModelResponse]]
) -> ModelResponse:
"""
ASYNCHRONOUS middleware for Kimi K2 adjustment.
Note the 'async def' and 'await handler(request)'.
"""
# 1. Execute the model call asynchronously
response = await handler(request)
# 2. Check if we should process this (Only for Moonshot/Kimi models)
model_id = request.model.model_id.lower() if hasattr(request, 'model') else ""
if "moonshot" not in model_id and "kimi" not in model_id:
return response
ai_msg = response.result[-1] if hasattr(response, 'result') else response.message
available_tool_names = [t.name for t in request.tools] if request.tools else []
print(f"AI Message before processing:\n{ai_msg}")
# Prepare to rebuild content to satisfy Bedrock Converse API
new_content = []
extracted_calls = []
# 2. Extract tools from tags or validate existing ones
# Kimi often mixes reasoning_content and text blocks
current_content = ai_msg.content if isinstance(ai_msg.content, list) else [{"type": "text", "text": ai_msg.content}]
for block in current_content:
# Keep reasoning_content exactly as it is (Bedrock requires it for signatures)
if block.get("type") == "reasoning_content":
new_content.append(block)
block_text = block.get("reasoning_content", {}).get("text", "")
if "<|tool_call_begin|>" in block_text:
extracted_calls.extend(parse_kimi_tool_calls(block_text, available_tool_names))
elif block.get("type") == "text":
text = block.get("text", "")
# If the text block contains the tags, parse them
if "<|tool_call_begin|>" in text:
extracted_calls.extend(parse_kimi_tool_calls(text, available_tool_names))
# Keep the text block but clean it (optional: remove tags to prevent double-parsing)
new_content.append(block)
# 3. If we found tool calls (or they already existed), we MUST add 'tool_use' blocks to content
# This is what Bedrock is complaining about.
target_tool_calls = ai_msg.tool_calls if ai_msg.tool_calls else extracted_calls
if target_tool_calls:
for tc in target_tool_calls:
# Generate a consistent ID if one doesn't exist (Bedrock needs toolUseId)
t_id = tc.get('id') or f"tooluse_{uuid.uuid4().hex[:12]}"
# Add to the content array in the format Bedrock expects
new_content.append({
"type": "tool_use",
"name": get_best_tool_match(tc['name'], available_tool_names),
"input": tc['args'],
"id": t_id
})
# Also update the top-level tool_calls list (for LangChain's internal routing)
tc['id'] = t_id
tc['name'] = get_best_tool_match(tc['name'], available_tool_names)
ai_msg.tool_calls = target_tool_calls
ai_msg.content = new_content
# Ensure response_metadata indicates adjustment
if not ai_msg.response_metadata: ai_msg.response_metadata = {}
ai_msg.response_metadata['kimi_k2_adjusted'] = True
ai_msg.response_metadata['stopReason'] = 'tool_use'
print(f"New Response: {response}")
return response
A few notes on use case:
- The middleware above is async in order to work with an async model call (pretty straightforward to change it to a regular node if needed)
- I’ve been noticing that kimi k2 thinking tends to still make many malformed toolcalls (i.e. using the
tool_id as the name, but I’ve seen several other failures as well). You will need to make sure your tools can fail gracefully and can provide info to kimi k2 on how to fix them
If anyone has a chance to try this out definitely let me know your thoughts