def grade_answer(self, state: WorkflowState, config) -> WorkflowState:
"""Grade the current answer and determine if it's sufficient"""
print("π― Answer Grading Agent: Evaluating answer quality...")
query = state["messages"][-1].content
# refined_query = state.get("extracted_questions", query)
answer = state.get("current_answer", "")
contexts = state.get("current_contexts", [])
keywords = state.get("keywords", [])
research_agent = state.get("current_research_agent", "unknown")
branch_name = state.get("branch_name", "PharmCare")
# Debug logging
print(f"π User Question: {query[:100] if query else 'EMPTY'}...")
print(f"π¬ Current Answer: {answer[:100] if answer else 'EMPTY'}...")
print(f"π Contexts count: {len(contexts)}")
if contexts:
print(f"π Sample context: {str(contexts[0])[:200] if contexts else 'NONE'}...")
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday","Sunday"]
timezone = pytz.timezone("Asia/Hong_Kong")
date = datetime.now(timezone)
weekday = weekdays[date.weekday()]
time = f"{date.strftime('%m/%d/%Y %H:%M:%S')} {weekday}"
metadata = config.get('metadata')
bot_name = metadata.get('bot_name', 'Assistant')
language = metadata.get('language', 'zh-TW')
branch_id = metadata.get('branch_id', '1')
database_name = metadata.get('database_name', 'PHARMCARE_DEMO')
generated_by = state.get("generated_by", "unknown")
if generated_by == "unanswered":
print("π Answer generated from handover document, skipping grading.")
return {
"grading_result": {
"score": 1.0,
"quality": "high",
"sufficient": True,
"feedback": "Answer generated from handover document, no grading needed."
}
}
if answer == "NO_ANSWER_FOUND":
return {
"grading_result": {
"score": 0.0,
"quality": "low",
"sufficient": False,
"feedback": "No answer found."
}
}
# Use with_structured_output for robust parsing
llm_with_structure = self.llm.with_structured_output(
GradingResult,
method="json_schema", # Use native JSON schema support
include_raw=True # Get both parsed and raw output for error handling
)
ans_grading_system_template = self.prompt_manager.get_prompt(
PromptType.QUANTITATIVE_GRADING,
branch_id,
database_name
)
ans_grading_human_template = """
User Question: {question}
Answer from {research_source}: {answer}
Domain Knowledge Context: {context}
Keywords: {keywords}
Current time is {time}
The Domain/Entity/System is {branch_name} and the bot going to answer is {bot_name}.
Return a JSON object with the following structure:
- score: float (0.0 to 1.0)
- quality: string ("low", "medium", or "high")
- sufficient: boolean
- feedback: string (detailed feedback)
"""
ans_chat_prompt = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(ans_grading_system_template),
HumanMessagePromptTemplate.from_template(ans_grading_human_template)
])
ans_chat = ans_chat_prompt.format_messages(
question=query,
answer=answer,
research_source=research_agent,
context=format_docs(contexts),
keywords=", ".join(keywords),
time=time,
branch_name=branch_name,
bot_name=bot_name
)
try:
# Invoke with structured output
response = llm_with_structure.invoke(ans_chat)
# Check if parsing was successful
if response.get("parsing_error"):
print(f"β οΈ Parsing failed: {response['parsing_error']}")
# Fallback to default values
grading_result = {
"score": 0.5,
"quality": "medium",
"sufficient": False,
"feedback": f"Parsing error occurred: {response['parsing_error']}"
}
else:
# Successfully parsed
parsed_resp = response["parsed"]
grading_result = self._parse_grading_response(parsed_resp)
except Exception as e:
print(f"β Grading failed completely: {e}")
# Ultimate fallback
grading_result = {
"score": 0.0,
"quality": "low",
"sufficient": False,
"feedback": f"Grading error: {str(e)}"
}
print(f"π Answer graded: Score={grading_result['score']:.2f}, Quality={grading_result['quality']}, Sufficient={grading_result['sufficient']}")
return {
"grading_result": grading_result
}
def _parse_grading_response(self, response: GradingResult) -> dict:
"""Parse the grading response into structured format"""
score = 0.0
quality = "low"
sufficient = False
feedback = "No feedback provided"
score = response.score
quality = response.quality
sufficient = response.sufficient
feedback = response.feedback
# for line in lines:
# line = line.strip()
# if line.startswith("SCORE:"):
# try:
# score = float(line.split(":", 1)[1].strip())
# except:
# score = 0.0
# elif line.startswith("QUALITY:"):
# quality = line.split(":", 1)[1].strip().lower()
# elif line.startswith("SUFFICIENT:"):
# sufficient = line.split(":", 1)[1].strip().lower() == "true"
# elif line.startswith("FEEDBACK:"):
# feedback = line.split(":", 1)[1].strip()
return {
"score": score,
"quality": quality,
"sufficient": sufficient,
"feedback": feedback
}
It is found that the agent sometime hang for long time in the grade_answer node. But I have no idea why the grade_answer part hang and not giving any output from the node.
I have also found that there is no output from the ChatVertexAI


