Bot Lagging In with_structured_output

    def grade_answer(self, state: WorkflowState, config) -> WorkflowState:
        """Grade the current answer and determine if it's sufficient"""
        print("🎯 Answer Grading Agent: Evaluating answer quality...")
        
        query = state["messages"][-1].content
        # refined_query = state.get("extracted_questions", query)
        answer = state.get("current_answer", "")
        contexts = state.get("current_contexts", [])
        keywords = state.get("keywords", [])
        research_agent = state.get("current_research_agent", "unknown")
        branch_name = state.get("branch_name", "PharmCare")
        
        # Debug logging
        print(f"πŸ“ User Question: {query[:100] if query else 'EMPTY'}...")
        print(f"πŸ’¬ Current Answer: {answer[:100] if answer else 'EMPTY'}...")
        print(f"πŸ“š Contexts count: {len(contexts)}")
        if contexts:
            print(f"πŸ“„ Sample context: {str(contexts[0])[:200] if contexts else 'NONE'}...")
        
        
        weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday","Sunday"]
        timezone = pytz.timezone("Asia/Hong_Kong")
        date = datetime.now(timezone)
        weekday = weekdays[date.weekday()]
        time = f"{date.strftime('%m/%d/%Y %H:%M:%S')} {weekday}"
        
        metadata = config.get('metadata')        
        bot_name = metadata.get('bot_name', 'Assistant')
        language = metadata.get('language', 'zh-TW')
        branch_id = metadata.get('branch_id', '1')
        database_name = metadata.get('database_name', 'PHARMCARE_DEMO')
        
        generated_by = state.get("generated_by", "unknown")

        if generated_by == "unanswered":
            print("πŸ”– Answer generated from handover document, skipping grading.")
            return {
                "grading_result": {
                    "score": 1.0,
                    "quality": "high",
                    "sufficient": True,
                    "feedback": "Answer generated from handover document, no grading needed."
                }
            }
        
        if answer == "NO_ANSWER_FOUND":
            return {
                "grading_result": {
                    "score": 0.0,
                    "quality": "low",
                    "sufficient": False,
                    "feedback": "No answer found."
                }
            }

        # Use with_structured_output for robust parsing
        llm_with_structure = self.llm.with_structured_output(
            GradingResult,
            method="json_schema",  # Use native JSON schema support
            include_raw=True  # Get both parsed and raw output for error handling
        )
        
        ans_grading_system_template = self.prompt_manager.get_prompt(
            PromptType.QUANTITATIVE_GRADING, 
            branch_id, 
            database_name
        )
        
        ans_grading_human_template = """
            User Question: {question}
            Answer from {research_source}: {answer}
            Domain Knowledge Context: {context}           
            Keywords: {keywords}
            Current time is {time}
            The Domain/Entity/System is {branch_name} and the bot going to answer is {bot_name}.
            
            Return a JSON object with the following structure:
            - score: float (0.0 to 1.0)
            - quality: string ("low", "medium", or "high")
            - sufficient: boolean
            - feedback: string (detailed feedback)
            """
        
        ans_chat_prompt = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(ans_grading_system_template),
            HumanMessagePromptTemplate.from_template(ans_grading_human_template)
        ])
        
        ans_chat = ans_chat_prompt.format_messages(
            question=query,
            answer=answer,
            research_source=research_agent,
            context=format_docs(contexts),
            keywords=", ".join(keywords),
            time=time,
            branch_name=branch_name,
            bot_name=bot_name
        )
        
        try:
            # Invoke with structured output
            response = llm_with_structure.invoke(ans_chat)
            
            # Check if parsing was successful
            if response.get("parsing_error"):
                print(f"⚠️ Parsing failed: {response['parsing_error']}")
                # Fallback to default values
                grading_result = {
                    "score": 0.5,
                    "quality": "medium",
                    "sufficient": False,
                    "feedback": f"Parsing error occurred: {response['parsing_error']}"
                }
            else:
                # Successfully parsed
                parsed_resp = response["parsed"]
                grading_result = self._parse_grading_response(parsed_resp)
                
        except Exception as e:
            print(f"❌ Grading failed completely: {e}")
            # Ultimate fallback
            grading_result = {
                "score": 0.0,
                "quality": "low",
                "sufficient": False,
                "feedback": f"Grading error: {str(e)}"
            }

        print(f"πŸ“Š Answer graded: Score={grading_result['score']:.2f}, Quality={grading_result['quality']}, Sufficient={grading_result['sufficient']}")
        
        return {
            "grading_result": grading_result
        }
    
    def _parse_grading_response(self, response: GradingResult) -> dict:
        """Parse the grading response into structured format"""
        
        score = 0.0
        quality = "low"
        sufficient = False
        feedback = "No feedback provided"
        
        score = response.score
        quality = response.quality
        sufficient = response.sufficient
        feedback = response.feedback
        
        # for line in lines:
        #     line = line.strip()
        #     if line.startswith("SCORE:"):
        #         try:
        #             score = float(line.split(":", 1)[1].strip())
        #         except:
        #             score = 0.0
        #     elif line.startswith("QUALITY:"):
        #         quality = line.split(":", 1)[1].strip().lower()
        #     elif line.startswith("SUFFICIENT:"):
        #         sufficient = line.split(":", 1)[1].strip().lower() == "true"
        #     elif line.startswith("FEEDBACK:"):
        #         feedback = line.split(":", 1)[1].strip()
        
        return {
            "score": score,
            "quality": quality,
            "sufficient": sufficient,
            "feedback": feedback
        }

It is found that the agent sometime hang for long time in the grade_answer node. But I have no idea why the grade_answer part hang and not giving any output from the node.


I have also found that there is no output from the ChatVertexAI

I really want to help you. Could you provide more code?

hi @charlieckh

could you temporarilty disable include_raw and see the result?
include_raw=False

I found this error doesnt occurs everytime. It sometimes happen and sometimes dont when include_raw = True. For include_raw = False, i am now observing whether this error will occur also.

@pawel-twardziak I found that it will also occurs even when include_raw = False.

ok, I see.
What is the exact model you are using?

I am currently using Gemini-2.5-flash-lite

then I think that might be a problem of the model itself… :thinking: