hi @elhamin
I tested this script:
import { AIMessage, AIMessageChunk, ToolMessage } from "@langchain/core/messages";
import { tool } from "@langchain/core/tools";
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { createAgent } from "langchain";
import * as dotenv from "dotenv";
import { z } from "zod";
dotenv.config();
const geminiApiKey = process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
if (!geminiApiKey) {
throw new Error(
"Missing GEMINI_API_KEY (or GOOGLE_API_KEY). Set it in your environment."
);
}
// LangChain's google-genai provider reads GOOGLE_API_KEY.
process.env.GOOGLE_API_KEY = geminiApiKey;
const toUpper = tool(
async ({ text }: { text: string }) => text.toUpperCase(),
{
name: "to_upper",
description: "Convert input text to uppercase.",
schema: z.object({
text: z.string().describe("Text to convert."),
}),
}
);
const charCount = tool(
async ({ text }: { text: string }) => text.length,
{
name: "char_count",
description: "Count number of characters in a text.",
schema: z.object({
text: z.string().describe("Text to count."),
}),
}
);
const model = new ChatGoogleGenerativeAI({
model: "gemini-3-flash-preview",
apiKey: geminiApiKey,
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "HIGH"
},
});
const agent = createAgent({
model: model,
tools: [toUpper, charCount]
});
const USER_PROMPT =
[
"Your tasks are to:",
"1) Count the number of characters in the sentence below.",
"2) Uppercase the sentence below.",
"Sentence: 'Weather update for Boston: forecast says temperature may drop tonight.'",
].join("\n");
const WEATHER_PATTERN = /\b(get_weather|weather|forecast|boston|temperature)\b/i;
const THOUGHT_SIGNATURES_KEY = "__gemini_function_call_thought_signatures__";
const MAX_DEBUG_PREVIEW = 500;
function clip(input: string, maxLen = MAX_DEBUG_PREVIEW): string {
return input.length > maxLen ? `${input.slice(0, maxLen)}...` : input;
}
function safePreview(value: unknown): string {
try {
if (typeof value === "string") return clip(value);
return clip(JSON.stringify(value));
} catch {
return "[unserializable]";
}
}
function collectTextSnippets(input: unknown): string[] {
if (input == null) return [];
if (typeof input === "string") return [input];
if (Array.isArray(input)) return input.flatMap(collectTextSnippets);
if (typeof input === "object") {
const obj = input as Record<string, unknown>;
const directText =
typeof obj.text === "string" ? [obj.text as string] : [];
const nestedKeys = ["content", "contentBlocks", "content_blocks", "parts"];
const nested = nestedKeys.flatMap((key) => collectTextSnippets(obj[key]));
return [...directText, ...nested];
}
return [];
}
function collectThoughtSnippets(input: unknown): string[] {
if (input == null) return [];
if (Array.isArray(input)) return input.flatMap(collectThoughtSnippets);
if (typeof input !== "object") return [];
const obj = input as Record<string, unknown>;
const ownThoughtText =
obj.thought === true && typeof obj.text === "string"
? [obj.text as string]
: [];
const nested = Object.values(obj).flatMap(collectThoughtSnippets);
return [...ownThoughtText, ...nested];
}
function collectThoughtSignatures(input: unknown): string[] {
if (input == null) return [];
if (Array.isArray(input)) return input.flatMap(collectThoughtSignatures);
if (typeof input !== "object") return [];
const obj = input as Record<string, unknown>;
const signatures: string[] = [];
const maybeMap = obj[THOUGHT_SIGNATURES_KEY];
if (maybeMap && typeof maybeMap === "object") {
for (const value of Object.values(maybeMap as Record<string, unknown>)) {
if (typeof value === "string" && value.length > 0) {
signatures.push(value);
}
}
}
const nested = Object.values(obj).flatMap(collectThoughtSignatures);
return [...signatures, ...nested];
}
function collectToolCallNames(input: unknown): string[] {
if (input == null) return [];
if (Array.isArray(input)) return input.flatMap(collectToolCallNames);
if (typeof input !== "object") return [];
const obj = input as Record<string, unknown>;
const names: string[] = [];
if (Array.isArray(obj.tool_call_chunks)) {
for (const chunk of obj.tool_call_chunks) {
if (chunk && typeof chunk === "object") {
const name = (chunk as Record<string, unknown>).name;
if (typeof name === "string" && name.length > 0) names.push(name);
}
}
}
if (Array.isArray(obj.tool_calls)) {
for (const call of obj.tool_calls) {
if (call && typeof call === "object") {
const name = (call as Record<string, unknown>).name;
if (typeof name === "string" && name.length > 0) names.push(name);
}
}
}
const functionCall =
(obj.functionCall as Record<string, unknown> | undefined) ??
(obj.function_call as Record<string, unknown> | undefined);
if (functionCall && typeof functionCall.name === "string") {
names.push(functionCall.name);
}
if (
obj.type === "functionCall" &&
obj.functionCall &&
typeof (obj.functionCall as Record<string, unknown>).name === "string"
) {
names.push((obj.functionCall as Record<string, unknown>).name as string);
}
const nested = Object.values(obj).flatMap(collectToolCallNames);
return [...names, ...nested];
}
function extractModelRequestedToolCalls(messages: unknown[]): string[] {
const requested: string[] = [];
for (const msg of messages) {
if (!AIMessage.isInstance(msg)) continue;
const toolCalls = (msg as AIMessage).tool_calls ?? [];
for (const call of toolCalls) {
if (call?.name) requested.push(call.name);
}
}
return requested;
}
function extractExecutedToolMessageNames(messages: unknown[]): string[] {
const executed: string[] = [];
for (const msg of messages) {
if (!ToolMessage.isInstance(msg)) continue;
if (typeof msg.name === "string" && msg.name.length > 0) {
executed.push(msg.name);
}
}
return executed;
}
function extractThoughtSignaturesFromMessages(messages: unknown[]): string[] {
const signatures: string[] = [];
for (const msg of messages) {
if (!AIMessage.isInstance(msg)) continue;
signatures.push(...collectThoughtSignatures((msg as AIMessage).additional_kwargs));
}
return signatures;
}
async function runTrial(index: number): Promise<void> {
console.log(`\n=== Trial ${index} ===`);
const weatherMentionsInChunks: string[] = [];
let streamedToolCallNames: string[] = [];
let debugToolCallNames: string[] = [];
const streamedThoughtSnippets: string[] = [];
let streamedThoughtSignatures: string[] = [];
const streamedAdditionalKwargsSamples: string[] = [];
let finalState: { messages?: unknown[] } | undefined;
const stream = await agent.stream(
{
messages: [{ role: "user", content: USER_PROMPT }],
},
{
streamMode: ["messages", "values", "debug"],
}
);
for await (const [mode, payload] of stream as AsyncIterable<
[string, unknown]
>) {
if (mode === "messages") {
const [chunk] = payload as [unknown, unknown];
const msgChunk = chunk as AIMessageChunk & {
text?: string;
tool_call_chunks?: Array<{ name?: string; args?: string }>;
additional_kwargs?: unknown;
contentBlocks?: unknown;
content_blocks?: unknown;
};
const allTextSnippets = collectTextSnippets([
msgChunk.text,
msgChunk.content,
msgChunk.contentBlocks,
msgChunk.content_blocks,
msgChunk.additional_kwargs,
]);
for (const snippet of allTextSnippets) {
if (snippet && WEATHER_PATTERN.test(snippet)) {
weatherMentionsInChunks.push(snippet);
}
}
if (Array.isArray(msgChunk.tool_call_chunks)) {
for (const tc of msgChunk.tool_call_chunks) {
if (tc?.name) streamedToolCallNames.push(tc.name);
}
}
streamedToolCallNames.push(
...collectToolCallNames([
msgChunk.content,
msgChunk.contentBlocks,
msgChunk.content_blocks,
msgChunk.additional_kwargs,
])
);
const thoughtSnippets = collectThoughtSnippets([
msgChunk.content,
msgChunk.contentBlocks,
msgChunk.content_blocks,
msgChunk.additional_kwargs,
]);
streamedThoughtSnippets.push(...thoughtSnippets);
streamedThoughtSignatures.push(
...collectThoughtSignatures(msgChunk.additional_kwargs)
);
if (
msgChunk.additional_kwargs != null &&
streamedAdditionalKwargsSamples.length < 2
) {
streamedAdditionalKwargsSamples.push(
safePreview(msgChunk.additional_kwargs)
);
}
}
if (mode === "values") {
finalState = payload as { messages?: unknown[] };
}
if (mode === "debug") {
debugToolCallNames.push(...collectToolCallNames(payload));
}
}
streamedToolCallNames = [...new Set(streamedToolCallNames)];
debugToolCallNames = [...new Set(debugToolCallNames)];
streamedThoughtSignatures = [...new Set(streamedThoughtSignatures)];
const modelRequestedToolCalls = [
...new Set(extractModelRequestedToolCalls(finalState?.messages ?? [])),
];
const executedToolMessageNames = [
...new Set(extractExecutedToolMessageNames(finalState?.messages ?? [])),
];
const finalStateThoughtSignatures = [
...new Set(extractThoughtSignaturesFromMessages(finalState?.messages ?? [])),
];
console.log("Prompt:", USER_PROMPT);
console.log("Stream-exposed tool-call names:", streamedToolCallNames);
console.log("Debug-exposed tool-call names:", debugToolCallNames);
console.log("Model-requested tool-call names:", modelRequestedToolCalls);
console.log("Executed tool-message names:", executedToolMessageNames);
console.log(
"Weather-ish mentions in streamed text:",
weatherMentionsInChunks.length
);
console.log("Thought snippets found in stream:", streamedThoughtSnippets.length);
console.log(
"Thought signatures found in stream:",
streamedThoughtSignatures.length
);
console.log(
"Thought signatures found in final state:",
finalStateThoughtSignatures.length
);
console.log(
"additional_kwargs samples captured:",
streamedAdditionalKwargsSamples.length
);
if (modelRequestedToolCalls.length === 0) {
console.log(
"WARNING: Model requested no tool calls."
);
} else if (
streamedToolCallNames.length === 0 &&
debugToolCallNames.length === 0
) {
console.log(
"NOTE: Tool calls exist in final state, but were not exposed as streaming tool-call chunks."
);
}
const hallucinatedGetWeatherInStream = [...streamedToolCallNames, ...debugToolCallNames].some((name) =>
/get_weather/i.test(name)
);
const hallucinatedGetWeatherExecuted = [...modelRequestedToolCalls, ...executedToolMessageNames].some((name) =>
/get_weather/i.test(name)
);
console.log("Has streamed get_weather?:", hallucinatedGetWeatherInStream);
console.log("Has executed get_weather?:", hallucinatedGetWeatherExecuted);
if (weatherMentionsInChunks.length > 0) {
console.log("Sample weather-ish chunk:", weatherMentionsInChunks[0]);
}
if (streamedThoughtSnippets.length > 0) {
console.log(
"Sample thought snippet:",
clip(streamedThoughtSnippets[0], 220)
);
} else if (streamedThoughtSignatures.length > 0) {
console.log(
"No explicit thought text blocks exposed by LangChain wrapper; thought signatures are present."
);
}
if (streamedThoughtSignatures.length > 0) {
console.log(
"Sample thought signature:",
clip(streamedThoughtSignatures[0], 220)
);
} else if (finalStateThoughtSignatures.length > 0) {
console.log(
"Sample thought signature from final state:",
clip(finalStateThoughtSignatures[0], 220)
);
}
if (streamedAdditionalKwargsSamples.length > 0) {
console.log(
"Sample additional_kwargs:",
clip(streamedAdditionalKwargsSamples[0], 220)
);
}
}
async function main(): Promise<void> {
// Run a few times because this behavior can be intermittent.
for (const i of [1, 2, 3]) {
await runTrial(i);
}
}
main().catch((error) => {
console.error(error);
process.exit(1);
});
And I see no get_weather-ish reasoning…
=== Trial 1 ===
Prompt: Your tasks are to:
1) Count the number of characters in the sentence below.
2) Uppercase the sentence below.
Sentence: 'Weather update for Boston: forecast says temperature may drop tonight.'
Stream-exposed tool-call names: []
Debug-exposed tool-call names: [ 'char_count', 'to_upper' ]
Model-requested tool-call names: [ 'char_count', 'to_upper' ]
Executed tool-message names: [ 'char_count', 'to_upper' ]
Weather-ish mentions in streamed text: 18
Thought snippets found in stream: 0
Thought signatures found in stream: 0
Thought signatures found in final state: 1
additional_kwargs samples captured: 2
Has streamed get_weather?: false
Has executed get_weather?: false
Sample weather-ish chunk: **Calculating Character Count**
Okay, I've started on this sentence: "Weather update for Boston: forecast says temperature may drop tonight." I have successfully started counting the characters. I'm focusing on the initial task of determining the character count, and the preliminary results appear promising.
Sample thought signature from final state: EoUVCoIV...
Sample additional_kwargs: {}
=== Trial 2 ===
Prompt: Your tasks are to:
1) Count the number of characters in the sentence below.
2) Uppercase the sentence below.
Sentence: 'Weather update for Boston: forecast says temperature may drop tonight.'
Stream-exposed tool-call names: []
Debug-exposed tool-call names: [ 'char_count', 'to_upper' ]
Model-requested tool-call names: [ 'char_count', 'to_upper' ]
Executed tool-message names: [ 'char_count', 'to_upper' ]
Weather-ish mentions in streamed text: 12
Thought snippets found in stream: 0
Thought signatures found in stream: 0
Thought signatures found in final state: 1
additional_kwargs samples captured: 2
Has streamed get_weather?: false
Has executed get_weather?: false
Sample weather-ish chunk: WEATHER UPDATE FOR BOSTON: FORECAST SAYS TEMPERATURE MAY DROP TONIGHT.
Sample thought signature from final state: EtIECs8EA...
Sample additional_kwargs: {}
=== Trial 3 ===
Prompt: Your tasks are to:
1) Count the number of characters in the sentence below.
2) Uppercase the sentence below.
Sentence: 'Weather update for Boston: forecast says temperature may drop tonight.'
Stream-exposed tool-call names: []
Debug-exposed tool-call names: [ 'char_count', 'to_upper' ]
Model-requested tool-call names: [ 'char_count', 'to_upper' ]
Executed tool-message names: [ 'char_count', 'to_upper' ]
Weather-ish mentions in streamed text: 9
Thought snippets found in stream: 0
Thought signatures found in stream: 0
Thought signatures found in final state: 1
additional_kwargs samples captured: 2
Has streamed get_weather?: false
Has executed get_weather?: false
Sample weather-ish chunk: WEATHER UPDATE FOR BOSTON: FORECAST SAYS TEMPERATURE MAY DROP TONIGHT.
Sample thought signature from final state: EtYOCtMOA...
Sample additional_kwargs: {}