Hi! I was wondering what the suggested approach is to have multiple model calls in a node where I only want to yield messages of a certain model to the stream. I have this setup here where I have a summarization call before I make the actual model call following the tutorial from langmem. Since I am using streamMode: "messages" every model result gets streamed to my client even the one inside summarize_messages. Unfortunately the result is that the intermediate message resulting from the summarization model call gets added to message stream causing the react useStream client to show the message. However, I want to keep that message hidden since it is not part of the actual message list. I tried doing this in a separate node, but this also doesn’t solve my problem because I’m using the React SDK and there I can filter by node or tags. I’m not sure if how to deal with this without writing my own implementation of the client, so any advice or hint towards relevant documentation would be highly appreciated.
langgraph server:
from datetime import datetime
from typing import Annotated
from langchain.chat_models import init_chat_model
from langchain_core.messages import (
BaseMessage,
SystemMessage,
)
from langgraph.graph import START, StateGraph, add_messages
from langgraph.prebuilt import ToolNode, tools_condition
from langmem.short_term import RunningSummary, summarize_messages
from typing_extensions import TypedDict
from graphs.config import GraphConfig
from graphs.prompts import (
SYSTEM_PROMPT,
)
from utils.utils import get_model_name
class State(TypedDict):
messages: Annotated[list[BaseMessage], add_messages]
summary: RunningSummary | None
tools = [...]
def call_model(state: State, config: GraphConfig) -> State:
system_prompt = config.get("configurable", {}).get(
"system_prompt",
SYSTEM_PROMPT.format(current_date=datetime.now().strftime("%Y-%m-%d")),
)
system_prompt_message = SystemMessage(content=system_prompt)
messages = state["messages"]
summary_model = init_chat_model(
model="google_genai:gemini-2.5-flash",
temperature=config.get("configurable", {}).get("temperature", 0.6),
disable_streaming=True,
tags=["summarizer"],
)
# summarize the messages
summarization_result = summarize_messages(
messages,
running_summary=state.get("summary"),
token_counter=summary_model.get_num_tokens_from_messages,
model=summary_model,
max_tokens=12000,
max_tokens_before_summary=8000,
max_summary_tokens=2000,
)
# initialize the actual model call
model_kwargs = {
"model": get_model_name(config),
"temperature": config.get("configurable", {}).get("temperature", 0.6),
}
if "max_output_tokens" in config.get("configurable", {}):
model_kwargs["max_tokens"] = config.get("configurable", {})["max_output_tokens"]
model = init_chat_model(**model_kwargs)
model_with_tools = model.bind_tools(tools)
response = model_with_tools.invoke(summarization_result.messages)
state_update = {"messages": [response]}
if summarization_result.running_summary:
state_update["summary"] = summarization_result.running_summary
return state_update
tool_node = ToolNode(tools=tools)
# Define the graph
graph = (
StateGraph(state_schema=State, config_schema=GraphConfig)
.add_node("model", call_model)
.add_node("tools", tool_node)
.add_edge(START, "model")
.add_edge("tools", "model")
.add_conditional_edges("model", tools_condition)
.compile(name="Graph")
)
Dependencies:
langsmith>=0.4.4
langgraph>=0.5.1
langgraph-cli>=0.3.3
langgraph-cli[inmem]>=0.3.3
langchain[openai]>=0.3.25
langchain[google-genai]>=0.3.25
langchain[anthropic]>=0.3.25
pytest>=8.4.0
requests>=2.32.4
PyJWT[crypto]>=2.8.0
langchain-groq>=0.3.6
langmem>=0.0.28
client side (heavily simplified):
const StreamContext = createContext<StreamContextType | undefined>(undefined);
const StreamSession =
({
children,
apiUrl,
assistantId,
}: {
children: ReactNode;
apiUrl: string;
assistantId: string;
}) => {
const streamValue = useStream({
apiUrl,
assistantId,
reconnectOnMount: true,
threadId,
initialValues,
messagesKey: "messages",
defaultHeaders: {
Authorization: `Bearer ${token}`,
},
});
return (
<StreamContext.Provider value={streamValue}>
{children}
</StreamContext.Provider>
);
}
export const StreamProvider: React.FC<{ children: ReactNode }> = ({
children,
}) => {
return (
<StreamSession apiUrl={apiUrl} assistantId={assistantId}>
{children}
</StreamSession>
);
};
// Create a custom hook to use the context
export const useStreamContext = (): StreamContextType => {
const context = useContext(StreamContext);
if (context === undefined) {
throw new Error("useStreamContext must be used within a StreamProvider");
}
return context;
};
export default StreamContext;
export const Thread = function Thread({
onClose,
setMaxWidth = false,
}: ThreadProps) {
const stream = useStreamContext();
const messages = stream.messages;
const isLoading = stream.isLoading;
const handleSubmit = async (
e: FormEvent,
deviceId: string,
orgId: string,
userId: string
) => {
e.preventDefault();
// get input from form event
const messageText = e.target.value;
const newMessage: Message = {
id: uuidv4(),
type: "human",
content: [{ type: "text", text: messageText }],
};
stream.submit(
{ messages: [...stream.messages, newMessage] },
{
metadata: {
organization_id: orgId,
device_id: deviceId,
user_id: userId,
},
config: {
configurable: {},
},
streamMode: ["messages"],
optimisticValues: (prev) => ({
...prev,
messages: [...(prev.messages ?? []), ...stream.messages, newMessage],
}),
}
);
};
return (
<StickToBottom>
<StickyToBottomContent
content={messages.map((message, index) =>
message.type === "human" ? (
<HumanMessage
key={message.id || `${message.type}-${index}`}
message={message}
isLoading={isLoading}
handleRegenerate={handleRegenerate}
/>
) : (
<AssistantMessage
key={message.id || `${message.type}-${index}`}
message={message}
isLoading={isLoading}
handleRegenerate={handleRegenerate}
/>
)
)}
footer={<ChatInput input={input} onSubmit={handleSubmit} />}
/>
</StickToBottom>
);
};
function ChatUI({
onClose,
setMaxWidth,
}: {
onClose?: () => void;
setMaxWidth?: boolean;
}) {
return (
<StreamProvider>
<Thread onClose={onClose} />
</StreamProvider>
);
}
Dependencies:
"@langchain/core": "^0.3.66",
"@langchain/langgraph": "^0.3.11",
"@langchain/langgraph-sdk": "^0.0.100",