I’m trying to stop langgraph platform from streaming output of all models to the client with no luck.
In between getting updates from node outputs I’m also getting token-level updates from all individual LLMs for some reason.
On the client side I’m using stream=useStream(...)
and rendering stream.values.messages
On the server side my models are loaded with the following code. Note "disable_streaming": True,
and .with_config(tags=["nostream"])
.
from langchain_openai import ChatOpenAI
from pydantic import Field, SecretStr
from langchain_core.utils.utils import secret_from_env
class ChatOpenRouter(ChatOpenAI):
openai_api_key: Optional[SecretStr] = Field(alias="api_key", default_factory=secret_from_env("OPENROUTER_API_KEY"))
@property
def lc_secrets(self) -> dict:
return {"openai_api_key": "OPENROUTER_API_KEY"}
def __init__(self, openai_api_key=None, **kwargs):
key = openai_api_key or os.getenv("OPENROUTER_API_KEY")
super().__init__(base_url="https://openrouter.ai/api/v1", openai_api_key=key, **kwargs)
def load_chat_model(fully_specified_name: str, name: Optional[str] = None) -> BaseChatModel:
"""Load a chat model from a fully specified name.
Args:
fully_specified_name (str): String in the format 'provider/model'.
name (str, optional): Name for the model instance.
"""
params = {
"model": fully_specified_name,
"rate_limiter": RATE_LIMITER,
"timeout": LLM_TIMEOUT,
"disable_streaming": True,
}
if name:
params["name"] = name
if fully_specified_name != "gpt-4o-search-preview":
params["temperature"] = 1.0
if fully_specified_name in {
"o4-mini-high",
"o4-mini",
"google/gemini-2.5-flash",
"google/gemini-2.5-pro",
"anthropic/claude-sonnet-4",
}:
return ChatOpenRouter(
**params,
).with_config(tags=["nostream"])
else:
return ChatOpenAI(
**params,
).with_config(tags=["nostream"])
I’d appreciate suggestion of both client-side or server-side fixes, server-side fixes preferred.