LangChain LLMs chatbot Weird responses and cut off

Hi everyone, greetings from a student who currently exploring LLMs chatbot. Currently need some help from the langchain community for some insights or guides, as i run out of idea and couldn’t figure out what happen to my chatbot’s generated response. I will share my code here:

import os

import re

import time

import streamlit as st

from io import BytesIO




import pdfplumber

from docx import Document as docxDoc

from pptx import Presentation

import openpyxl

import pandas as pd

import csv




from langchain_community.llms.gpt4all import GPT4All

from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

from langchain_community.vectorstores import FAISS

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.embeddings import SentenceTransformerEmbeddings

from langchain_core.chat_history import InMemoryChatMessageHistory

from langchain_core.documents import Document




# |=================HELPER FUNCTION===================|

def bot_reply(bot_chain, user_question, rag=None):




    input_dict = {

        "content": rag or "",

        "question": user_question

    }

    

    print(f"🚩Debug: {input_dict}")




    accumulate = ''

    response = bot_chain.invoke(input_dict)

    print(f"🚩Debug - Recieved Response: {response}")




    sentences = re.split(r"(?<=[.!?]) +", response)

    for word in sentences:

        accumulate += word + " "

        yield accumulate

        time.sleep(0.05)





def make_chunks(corpus):

    docs = []

    for data in corpus:

        docs.append(Document(

            page_content=data['content'],

            metadata={

                "page": data['page']

            }

        ))




    text_splitter = RecursiveCharacterTextSplitter(

        chunk_size=500,

        chunk_overlap=50

    )

    chunks = text_splitter.split_documents(docs)

    return chunks




def pdf_extractor(uploaded_file):

    file_bytes = uploaded_file.read()

    pdf_stream = BytesIO(file_bytes)

    

    with pdfplumber.open(pdf_stream) as attachment:

        data = []

        for page in attachment.pages:

            pg_num = page.page_number

            content = page.extract_text() or ""




            if content:

                data.append({

                    "page": pg_num,

                    "content": content

                })

    return data




def docx_extractor(uploaded_file):

    doc = docxDoc(uploaded_file)

    data = []

    incre = 0

    for para in doc.paragraphs:

        incre += 1

        if para.text.strip():

            data.append({

                "page": incre,

                "content": para.text.strip() 

            })




    return data




def pptx_extractor(uploaded_file):

    ppt = Presentation(uploaded_file)

    data = []

    incre = 0

    for slide in ppt.slides:

        slide_text = []

        incre += 1




        for shape in slide.shapes:

            if hasattr(shape, "text"):

                slide_text.append(shape.text.strip())




                raw = " ".join(slide_text)

                clean = re.sub(r'\s+', ' ', raw).strip()




        if slide_text:

            data.append({

                "page": incre,

                "content": clean

            })




    return data




model = GPT4All(

    model="Llama-3.2-3B-Instruct-Q4_0.gguf",

    temp=0.75,

    top_p=1,

    verbose=False,

    max_tokens=2048,

    allow_download=False,

    device="cpu"

)





bot_template = SystemMessagePromptTemplate.from_template("""

    You are the personal AI assistant.

    Answer question and keep your response clear and precise.

    If applicable, structure your response in point form.                                                     

""")



human_template = HumanMessagePromptTemplate.from_template("""

    {content}

    Question: {question}

""")




prompt = ChatPromptTemplate.from_messages([bot_template, human_template])




chain = prompt | model





# |=================TITAN UI===================|

st.header("Hello :orange[Yong Ming]!")

st.subheader("Start chating with :orange[TITAN]!")





with st.sidebar:

    st.subheader("Continue to our previous chat")




# initialize chat history

if "messages" not in st.session_state:

    st.session_state.messages = []





# preserve history when coming back or rerun

for message in st.session_state.messages:

    with st.chat_message(message["role"]):

        st.markdown(message["content"])




if prompt := st.chat_input(

    placeholder="Write Message Here...",

    accept_file=True,

    file_type=['pdf', 'txt', 'docx', 'pptx', 'xlsx'],

    ):

    

    

    text = prompt.text or ""

    files = prompt.files or []




    with st.chat_message("human"):

        if text:

            st.markdown(text)

        if files:

            st.markdown(f"🧷**My Attachment:** {files[0].name}")

            




    st.session_state.messages.append({

        "role": "human",

        "content": text

    })




    rag_context = None





    if files:

        target_dir = 'C:/Users/ASUS/OneDrive/Desktop/documented_chatbot/attachments/'

        if not os.path.exists(target_dir):

            os.makedirs(target_dir, exist_ok=True)

        

        uploaded_file = files[0]

        saved_attachment = os.path.join(target_dir, uploaded_file.name)




        with open(saved_attachment, 'wb') as f:

            f.write(uploaded_file.getbuffer())




        # route to decide extractor

        if uploaded_file.name.endswith(".pdf"):

            data = pdf_extractor(uploaded_file)

        elif uploaded_file.name.endswith(".docx"):

            data = docx_extractor(uploaded_file)

        elif uploaded_file.name.endswith(".pptx"):

            data = pptx_extractor(uploaded_file)

        else:

            data = []




        chunks = make_chunks(data)




        rag_context = "\n\n".join([c.page_content for c in chunks])

            

    

    with st.chat_message("ai"):

        placeholder = st.empty()




        ai_msg = ""

        for chunk in bot_reply(chain, text, rag_context):

            placeholder.markdown(chunk)

            ai_msg = chunk

    

    st.session_state.messages.append({

        "role": "ai",

        "content": ai_msg

    })




    print(f"🚩Debug: {ai_msg}")

Most of the time, the responses seems to be cut off, and it sometime start with weird opening like picture shown below:

I hope to get clearer explaination on my curious, as i assume something from my backend cause it. And what i should take note while dealing with response cut off?

Very appreciate to anyone who explain and provide help. Thank you so much!

Can you elaborate on what do you mean by responses are cutoff.

Hi thanks for reply

The cut off here means like, the responses might still have words or sentences to conitnue, but somehow stop at some point, therefore generated an incomplete paragraph

Increase your max_tokens