I am attempting to make a LangChain agent that uses Playwright to interact with websites. Due to the dynamic requirements of every website, there is no set structure for what tools should be called in what order because the agent should be able to intelligently navigate through a website. I have successfully gotten the agent to visit specific URLs but I am running into issues getting it to call more than one tool. Playwright ends up aborting the operation and I am pretty sure it’s because while it is in the middle of processing one page function call, another one happens and it aborts. I am unsure how I should be getting LangChain to be able to call tools in a flexible order while also ensuring it does not execute multiple tools that interfere with each other.
Are there any suggestions on the best way to go about this? I have considered using LangGraph or making it synchronous and neither really worked out, but I am also new to this and could have missed something. Any ideas or input would be appreciated
My current code is below
import asyncio, os
from dotenv import load_dotenv
from playwright.async_api import Page, async_playwright, Playwright
from langchain.agents import create_agent
from langchain_ollama import ChatOllama
from langchain.tools import tool
class PlaywrightAgent:
page: Page
base_url: str
def __init__(self, page: Page) -> None:
self.page = page
load_dotenv()
self.base_url = os.getenv("OLLAMA_BASE_URL")
async def create_playwright_agent(playwright: Playwright):
chromium = playwright.chromium
browser = await chromium.launch(headless=False)
page = await browser.new_page()
return PlaywrightAgent(page=page)
async def _visit_url(self, url: str) -> str:
await self.page.goto(url=url, wait_until="domcontentloaded", timeout=15000)
return self.page.url
async def _go_back(self) -> None:
await self.page.go_back()
def get_tools(self):
@tool
async def visit_url(url: str) -> str:
"""
Visit a specified URL.
Args:
url (str): The URL to visit.
Returns:
The current url the Page is on.
"""
return await self._visit_url(url)
@tool
async def go_back() -> None:
"""
Go back to the previous page in the browser history.
Args:
None
Returns:
None
"""
return await self._go_back()
return [visit_url, go_back]
async def run(self):
tools = self.get_tools()
model = ChatOllama(
model="llama3.1",
temperature=0.1,
max_tokens=4096,
timeout=30,
base_url=self.base_url,
)
agent = create_agent(
model=model,
tools=tools,
system_prompt="You are a web browsing agent meant to use your Playwright tools to interact with web pages."
)
async for event in agent.astream(
input={"messages": [{"role": "user", "content": "Use your visit_url tool to visit the website https://httpbin.org. Then visit https://whati.me/. After that, use your tool to go back a page."}]},
stream_mode="debug",
print_mode="debug"
):
print(event)
async def main():
playwright = await async_playwright().start()
agent = await PlaywrightAgent.create_playwright_agent(playwright=playwright)
await agent.run()
await asyncio.sleep(3600)
asyncio.run(main())