@tool
def search_web(query: str, config: RunnableConfig | None = None):
"""
This tool performs a web search using an external search API to retrieve relevant information based on a user's query.
Parameters:
query: (string) The user's search query to find relevant web information.
Return: A list of up to 5 web search results, each containing a title, snippet, and URL.
"""
try:
flag = 1 if config else 0
print(f"if config: {flag}")
if config:
metadata = config.get('metadata', {})
search_domains = metadata.get('search_domain', '')
else:
search_domains = ''
print(f"🔍 No specific search domains provided in config: {config}")
print(f"🔍 Performing web search for query: {query} with domains: {search_domains}")
docs = []
if search_domains:
if "," in search_domains:
search_domains = [d.strip() for d in search_domains.split(",")]
else:
search_domains = [search_domains.strip()]
tavily_search = TavilySearch(
max_results=3,
search_depth="basic",
include_answer=False,
include_raw_content=False,
include_domains=search_domains
)
search_results = tavily_search.invoke({"query": query})
results = search_results.get("results", [])
for result in results:
content = result.get("content", "")
content = content
url = result.get("url", "")
docs.append(f"Web Search Result: {content} URL: {url} \n")
return docs
except Exception as e:
print(f"❌ Error in web search: {e}")
return []
@tool
def search_vectorstore(query:str, config: RunnableConfig = None):
"""
This tool retrieves relevant documents from a vector store based on a user's query.
Parameters:
query: (string) The user's search query to find matching documents.
Return: A list of up to 7 documents that match the query.
"""
start_time = time.time()
# Get database and branch info from config
flag = 1 if config else 0
print(f"if config: {flag}")
if config:
if config.get('metadata'):
metadata = config.get('metadata', {})
database_name = metadata.get('database_name', 'PHARMCARE_HIA')
branch_id = metadata.get('branch_id', '1')
elif config.get("configurable"):
configurable = config.get("configurable", {})
database_name = configurable.get('database_name', 'PHARMCARE_HIA')
branch_id = configurable.get('branch_id', '1')
else:
database_name = 'PHARMCARE_HIA'
branch_id = '1'
collection_name = f"{database_name}-{branch_id}"
print(f"🔍 Searching all documents in collection: {collection_name}")
try:
# Use cached clients from connection pools
genai_client = get_cached_genai_client("pharmcare-chatbot-429003", "us-central1")
qdrant_client = get_cached_qdrant_client()
embedding = VertexAIEmbeddings(model_name="gemini-embedding-001", project="pharmcare-chatbot-429003")
if not genai_client:
print("❌ Failed to get GenAI client from pool")
return []
if not qdrant_client:
print("❌ Failed to get Qdrant client from pool")
return []
# Generate embedding using cached GenAI client
embedding_start = time.time()
resp = embedding.embed(texts=[query], dimensions=768, embeddings_task_type="RETRIEVAL_QUERY")
embedding_time = time.time() - embedding_start
query_embedding = resp[0]
if not query_embedding:
print("❌ Failed to get embedding for query")
return []
# Search using cached Qdrant client - EXCLUDE Handover documents and empty content
search_start = time.time()
# Create filter to exclude Handover category (system documents) and empty page_content
filter_conditions = models.Filter(
must=[
models.FieldCondition(
key="page_content",
match=models.MatchExcept(**{"except": ["", "None", "none", "null"]})
)
],
)
search_result = qdrant_client.search(
collection_name=collection_name,
query_vector=query_embedding,
with_payload=True,
limit=7, # Retrieve results excluding Handover
query_filter=filter_conditions # Apply exclusion filter
)
search_time = time.time() - search_start
# Format results with category information
docs = []
for point in search_result:
metadata = point.payload.get('metadata', {})
category = metadata.get('category', '')
source = metadata.get('source', 'N/A')
content = point.payload.get('page_content', '')
# Skip documents with empty or None content
if not content or content.strip() == '' or content.lower() == 'none':
print(f"⚠️ Skipping document with empty content: category={category}, source={source}")
continue
# Skip Handover documents (additional safety check)
# if category == 'Handover':
# print(f"⚠️ Skipping Handover document: {source}")
# continue
# Format based on category
if category == 'template':
question = metadata.get('question', 'N/A')
docs.append(f"[Template] Question: {question}\nContent: {content}")
elif category == 'QnA':
question = metadata.get('question', 'N/A')
docs.append(f"[QnA] Question: {question}\nAnswer: {content}")
elif category == 'Handover':
question = metadata.get('question', 'N/A')
docs.append(f"[QnA] Question: {question}\nAnswer: {content}")
else:
docs.append(f"[{category}] Source: {source}\nContent: {content}")
total_time = time.time() - start_time
print(f"âś… Retrieved {len(docs)} documents in {total_time:.3f}s (embedding: {embedding_time:.3f}s, search: {search_time:.3f}s)")
# Debug: Print first result if available
# if docs:
# print(f"đź“„ Sample result: {docs[0][:200]}...")
# else:
# print("⚠️ No valid documents found after filtering")
except Exception as e:
print(f"❌ Error in vectorstore search: {e}")
docs = []
return docs
@tool
def search_all_documents(query: str, config: RunnableConfig = None):
"""
This tool retrieves relevant documents from a vector store based on a user's query.
Uses connection pools for optimal performance across multiple invocations.
Parameters:
query: (string) The user's search query to find matching documents.
Return: A list of up to 7 documents that match the query across all user-facing categories.
Constraint: Excludes system documents (Handover category) and documents with empty content.
Searches templates, QnA, KM, web_crawl, and other user-facing content.
"""
parallel_runnables = RunnableParallel({
"km_search": RunnableLambda(lambda _: search_vectorstore.invoke({"query": query}, config=config)),
"online_search": RunnableLambda(lambda _: search_web.invoke({"query": query}, config=config)),
})
results = parallel_runnables.invoke({})
km_search = results.get("km_search", "")
online_search = results.get("online_search", "")
return km_search + online_search
I am running the above code via the agent system in langgraph. The agent is created by create_agent using llm, tools and prompts. However, i found that the config flag is 1 on only search_vectorstore but 0 on search_web. I have tried using RunnableLambda but the result is still the same.