Browser CDP API

The Browser CDP (Chrome DevTools Protocol) API provides programmatic control over the built-in browser for automation, testing, and web scraping tasks.

Overview

AIO Sandbox includes a full browser with CDP access, allowing you to:

  • Control browser navigation and interactions
  • Capture screenshots and PDFs
  • Execute JavaScript in browser context
  • Monitor network requests and responses
  • Access page content and DOM manipulation

Getting CDP URL

To use the browser, first get the WebSocket debugger URL:

Endpoint: GET /cdp/json/version

Response:

{
  "Browser": "Chrome/120.0.0.0",
  "Protocol-Version": "1.3",
  "User-Agent": "Mozilla/5.0 ...",
  "V8-Version": "12.0.267.17",
  "WebKit-Version": "537.36",
  "webSocketDebuggerUrl": "ws://localhost:8080/cdp/ws/12345"
}

The webSocketDebuggerUrl is your CDP connection endpoint.

Browser Use Integration

Here's a complete example using the browser-use Python library:

import asyncio
import requests
from browser_use.browser.browser import BrowserSession, BrowserProfile

class AIOSandboxBrowser:
    def __init__(self, server_url="http://localhost:8080"):
        self.server_url = server_url
        self.browser_session = None
        
        # Browser configuration
        self.profile = {
            "extra_http_headers": {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
            },
            "ignore_https_errors": True,
            "java_script_enabled": True,
            "viewport": {"width": 1920, "height": 1080},
        }
    
    def get_cdp_url(self):
        """Get CDP WebSocket URL from AIO Sandbox"""
        response = requests.get(f"{self.server_url}/cdp/json/version", timeout=10)
        response.raise_for_status()
        
        data = response.json()
        cdp_url = data.get("webSocketDebuggerUrl")
        
        if cdp_url:
            print(f"CDP URL: {cdp_url}")
            return cdp_url
        else:
            raise Exception("Could not get CDP URL from sandbox")
    
    async def initialize(self):
        """Initialize browser session"""
        if self.browser_session is None:
            cdp_url = self.get_cdp_url()
            
            self.browser_session = BrowserSession(
                browser_profile=BrowserProfile(**self.profile),
                cdp_url=cdp_url
            )
            await self.browser_session.start()
            print("Browser session initialized")
        
        return self.browser_session
    
    async def navigate_and_screenshot(self, url, screenshot_path="screenshot.png"):
        """Navigate to URL and take screenshot"""
        await self.initialize()
        
        # Create new page
        page = await self.browser_session.browser_context.new_page()
        
        # Set viewport and headers
        await page.set_viewport_size({"width": 1920, "height": 1080})
        await page.set_extra_http_headers({
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
        })
        
        try:
            # Navigate to URL
            await page.goto(url, wait_until="networkidle")
            
            # Wait for page to be fully loaded
            await page.wait_for_load_state("domcontentloaded")
            await page.wait_for_timeout(2000)  # Additional wait for rendering
            
            # Take screenshot
            await page.screenshot(path=screenshot_path, full_page=True)
            
            # Get page content
            content = await page.content()
            title = await page.title()
            
            return {
                "url": url,
                "title": title,
                "screenshot": screenshot_path,
                "content_length": len(content)
            }
            
        finally:
            await page.close()
    
    async def extract_page_data(self, url, selectors=None):
        """Extract specific data from page using CSS selectors"""
        await self.initialize()
        
        page = await self.browser_session.browser_context.new_page()
        
        try:
            await page.goto(url, wait_until="networkidle")
            
            data = {"url": url, "title": await page.title()}
            
            if selectors:
                for name, selector in selectors.items():
                    try:
                        element = await page.query_selector(selector)
                        if element:
                            data[name] = await element.text_content()
                        else:
                            data[name] = None
                    except Exception as e:
                        data[name] = f"Error: {e}"
            
            return data
            
        finally:
            await page.close()
    
    async def close(self):
        """Close browser session"""
        if self.browser_session:
            await self.browser_session.close()
            self.browser_session = None

# Usage example
async def main():
    browser = AIOSandboxBrowser()
    
    try:
        # Take screenshot of a website
        result = await browser.navigate_and_screenshot("https://example.com")
        print(f"Screenshot saved: {result}")
        
        # Extract specific data
        selectors = {
            "heading": "h1",
            "description": "p",
            "links": "a"
        }
        
        data = await browser.extract_page_data("https://example.com", selectors)
        print(f"Extracted data: {data}")
        
    finally:
        await browser.close()

# Run the example
asyncio.run(main())

Playwright Integration

For advanced browser automation with Playwright:

import asyncio
import requests
from playwright.async_api import async_playwright

class AIOSandboxPlaywright:
    def __init__(self, server_url="http://localhost:8080"):
        self.server_url = server_url
        self.playwright = None
        self.browser = None
    
    def get_cdp_url(self):
        """Get CDP endpoint from AIO Sandbox"""
        response = requests.get(f"{self.server_url}/cdp/json/version")
        response.raise_for_status()
        
        data = response.json()
        return data.get("webSocketDebuggerUrl")
    
    async def connect(self):
        """Connect to AIO Sandbox browser via CDP"""
        if not self.playwright:
            self.playwright = await async_playwright().start()
            
            cdp_url = self.get_cdp_url()
            
            # Connect to existing browser
            self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
            
        return self.browser
    
    async def automate_form_fill(self, url, form_data):
        """Example: Fill and submit a form"""
        browser = await self.connect()
        context = await browser.new_context()
        page = await context.new_page()
        
        try:
            await page.goto(url)
            
            # Fill form fields
            for selector, value in form_data.items():
                await page.fill(selector, value)
            
            # Submit form (example)
            await page.click("input[type='submit']")
            
            # Wait for navigation
            await page.wait_for_load_state("networkidle")
            
            return {
                "success": True,
                "final_url": page.url,
                "title": await page.title()
            }
            
        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }
            
        finally:
            await context.close()
    
    async def monitor_network(self, url, duration=30):
        """Monitor network requests for specified duration"""
        browser = await self.connect()
        context = await browser.new_context()
        page = await context.new_page()
        
        requests_log = []
        
        # Listen to network events
        page.on("request", lambda request: requests_log.append({
            "url": request.url,
            "method": request.method,
            "headers": dict(request.headers),
            "timestamp": asyncio.get_event_loop().time()
        }))
        
        try:
            await page.goto(url)
            await asyncio.sleep(duration)  # Monitor for specified duration
            
            return {
                "url": url,
                "monitoring_duration": duration,
                "total_requests": len(requests_log),
                "requests": requests_log
            }
            
        finally:
            await context.close()
    
    async def close(self):
        """Close connections"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

# Usage example
async def demo():
    automation = AIOSandboxPlaywright()
    
    try:
        # Example: Fill a search form
        form_data = {
            "input[name='q']": "AIO Sandbox browser automation",
            "input[name='search']": ""
        }
        
        result = await automation.automate_form_fill("https://example.com/search", form_data)
        print(f"Form automation result: {result}")
        
        # Example: Monitor network traffic
        network_data = await automation.monitor_network("https://example.com", duration=10)
        print(f"Network monitoring: {network_data['total_requests']} requests captured")
        
    finally:
        await automation.close()

asyncio.run(demo())

VNC Visual Access

For visual browser interaction, use the VNC interface:

VNC URL: http://localhost:8080/vnc/index.html?autoconnect=true

Features:

  • Full desktop environment
  • Visual browser interaction
  • Keyboard and mouse input
  • Real-time screen sharing
  • Screenshot capture

JavaScript Browser Control

For web-based automation, use the CDP directly:

class AIOSandboxCDP {
    constructor(baseUrl = 'http://localhost:8080') {
        this.baseUrl = baseUrl;
        this.cdpWs = null;
    }
    
    async connect() {
        // Get CDP URL
        const response = await fetch(`${this.baseUrl}/cdp/json/version`);
        const data = await response.json();
        const wsUrl = data.webSocketDebuggerUrl;
        
        // Connect to CDP WebSocket
        this.cdpWs = new WebSocket(wsUrl);
        
        return new Promise((resolve) => {
            this.cdpWs.onopen = () => {
                console.log('Connected to AIO Sandbox CDP');
                resolve();
            };
        });
    }
    
    async sendCommand(method, params = {}) {
        const message = {
            id: Date.now(),
            method,
            params
        };
        
        return new Promise((resolve, reject) => {
            const messageHandler = (event) => {
                const response = JSON.parse(event.data);
                if (response.id === message.id) {
                    this.cdpWs.removeEventListener('message', messageHandler);
                    if (response.error) {
                        reject(new Error(response.error.message));
                    } else {
                        resolve(response.result);
                    }
                }
            };
            
            this.cdpWs.addEventListener('message', messageHandler);
            this.cdpWs.send(JSON.stringify(message));
        });
    }
    
    async navigateToUrl(url) {
        await this.sendCommand('Page.enable');
        const result = await this.sendCommand('Page.navigate', { url });
        
        // Wait for page load
        return new Promise((resolve) => {
            const loadHandler = (event) => {
                const message = JSON.parse(event.data);
                if (message.method === 'Page.loadEventFired') {
                    this.cdpWs.removeEventListener('message', loadHandler);
                    resolve(result);
                }
            };
            this.cdpWs.addEventListener('message', loadHandler);
        });
    }
    
    async takeScreenshot() {
        const result = await this.sendCommand('Page.captureScreenshot', {
            format: 'png',
            quality: 90
        });
        return result.data; // Base64 encoded image
    }
}

// Usage
const cdp = new AIOSandboxCDP();
await cdp.connect();
await cdp.navigateToUrl('https://example.com');
const screenshot = await cdp.takeScreenshot();
console.log('Screenshot captured:', screenshot.length, 'bytes');

Best Practices

  1. Resource Management: Always close pages and contexts when done
  2. Error Handling: Wrap CDP operations in try-catch blocks
  3. Wait Strategies: Use appropriate wait conditions for dynamic content
  4. Screenshot Timing: Allow sufficient time for page rendering
  5. Network Monitoring: Set reasonable monitoring durations

Common Use Cases

  • Web Scraping: Extract data from dynamic websites
  • UI Testing: Automated testing of web applications
  • Screenshot Generation: Capture page visuals for reports
  • Form Automation: Fill and submit forms programmatically
  • Performance Monitoring: Track network requests and timing

Next Steps

  • Terminal Integration: Combine with shell commands → Shell API
  • File Operations: Save extracted data → File API
  • Practical Examples: See browser automation in action → Browser Examples