Browser CDP API

The Browser CDP (Chrome DevTools Protocol) API provides programmatic control over the built-in browser for automation, testing, and web scraping tasks.

Overview

AIO Sandbox includes a full browser with CDP access, allowing you to:

Control browser navigation and interactions
Capture screenshots and PDFs
Execute JavaScript in browser context
Monitor network requests and responses
Access page content and DOM manipulation

Getting CDP URL

To use the browser, first get the WebSocket debugger URL:

Endpoint: GET /cdp/json/version

Response:

{
  "Browser": "Chrome/120.0.0.0",
  "Protocol-Version": "1.3",
  "User-Agent": "Mozilla/5.0 ...",
  "V8-Version": "12.0.267.17",
  "WebKit-Version": "537.36",
  "webSocketDebuggerUrl": "ws://localhost:8080/cdp/ws/12345"
}

The webSocketDebuggerUrl is your CDP connection endpoint.

Browser Use Integration

Here's a complete example using the browser-use Python library:

import asyncio
import requests
from browser_use.browser.browser import BrowserSession, BrowserProfile

class AIOSandboxBrowser:
    def __init__(self, server_url="http://localhost:8080"):
        self.server_url = server_url
        self.browser_session = None
        
        # Browser configuration
        self.profile = {
            "extra_http_headers": {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
            },
            "ignore_https_errors": True,
            "java_script_enabled": True,
            "viewport": {"width": 1920, "height": 1080},
        }
    
    def get_cdp_url(self):
        """Get CDP WebSocket URL from AIO Sandbox"""
        response = requests.get(f"{self.server_url}/cdp/json/version", timeout=10)
        response.raise_for_status()
        
        data = response.json()
        cdp_url = data.get("webSocketDebuggerUrl")
        
        if cdp_url:
            print(f"CDP URL: {cdp_url}")
            return cdp_url
        else:
            raise Exception("Could not get CDP URL from sandbox")
    
    async def initialize(self):
        """Initialize browser session"""
        if self.browser_session is None:
            cdp_url = self.get_cdp_url()
            
            self.browser_session = BrowserSession(
                browser_profile=BrowserProfile(**self.profile),
                cdp_url=cdp_url
            )
            await self.browser_session.start()
            print("Browser session initialized")
        
        return self.browser_session
    
    async def navigate_and_screenshot(self, url, screenshot_path="screenshot.png"):
        """Navigate to URL and take screenshot"""
        await self.initialize()
        
        # Create new page
        page = await self.browser_session.browser_context.new_page()
        
        # Set viewport and headers
        await page.set_viewport_size({"width": 1920, "height": 1080})
        await page.set_extra_http_headers({
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9',
        })
        
        try:
            # Navigate to URL
            await page.goto(url, wait_until="networkidle")
            
            # Wait for page to be fully loaded
            await page.wait_for_load_state("domcontentloaded")
            await page.wait_for_timeout(2000)  # Additional wait for rendering
            
            # Take screenshot
            await page.screenshot(path=screenshot_path, full_page=True)
            
            # Get page content
            content = await page.content()
            title = await page.title()
            
            return {
                "url": url,
                "title": title,
                "screenshot": screenshot_path,
                "content_length": len(content)
            }
            
        finally:
            await page.close()
    
    async def extract_page_data(self, url, selectors=None):
        """Extract specific data from page using CSS selectors"""
        await self.initialize()
        
        page = await self.browser_session.browser_context.new_page()
        
        try:
            await page.goto(url, wait_until="networkidle")
            
            data = {"url": url, "title": await page.title()}
            
            if selectors:
                for name, selector in selectors.items():
                    try:
                        element = await page.query_selector(selector)
                        if element:
                            data[name] = await element.text_content()
                        else:
                            data[name] = None
                    except Exception as e:
                        data[name] = f"Error: {e}"
            
            return data
            
        finally:
            await page.close()
    
    async def close(self):
        """Close browser session"""
        if self.browser_session:
            await self.browser_session.close()
            self.browser_session = None

# Usage example
async def main():
    browser = AIOSandboxBrowser()
    
    try:
        # Take screenshot of a website
        result = await browser.navigate_and_screenshot("https://example.com")
        print(f"Screenshot saved: {result}")
        
        # Extract specific data
        selectors = {
            "heading": "h1",
            "description": "p",
            "links": "a"
        }
        
        data = await browser.extract_page_data("https://example.com", selectors)
        print(f"Extracted data: {data}")
        
    finally:
        await browser.close()

# Run the example
asyncio.run(main())

Playwright Integration

For advanced browser automation with Playwright:

import asyncio
import requests
from playwright.async_api import async_playwright

class AIOSandboxPlaywright:
    def __init__(self, server_url="http://localhost:8080"):
        self.server_url = server_url
        self.playwright = None
        self.browser = None
    
    def get_cdp_url(self):
        """Get CDP endpoint from AIO Sandbox"""
        response = requests.get(f"{self.server_url}/cdp/json/version")
        response.raise_for_status()
        
        data = response.json()
        return data.get("webSocketDebuggerUrl")
    
    async def connect(self):
        """Connect to AIO Sandbox browser via CDP"""
        if not self.playwright:
            self.playwright = await async_playwright().start()
            
            cdp_url = self.get_cdp_url()
            
            # Connect to existing browser
            self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
            
        return self.browser
    
    async def automate_form_fill(self, url, form_data):
        """Example: Fill and submit a form"""
        browser = await self.connect()
        context = await browser.new_context()
        page = await context.new_page()
        
        try:
            await page.goto(url)
            
            # Fill form fields
            for selector, value in form_data.items():
                await page.fill(selector, value)
            
            # Submit form (example)
            await page.click("input[type='submit']")
            
            # Wait for navigation
            await page.wait_for_load_state("networkidle")
            
            return {
                "success": True,
                "final_url": page.url,
                "title": await page.title()
            }
            
        except Exception as e:
            return {
                "success": False,
                "error": str(e)
            }
            
        finally:
            await context.close()
    
    async def monitor_network(self, url, duration=30):
        """Monitor network requests for specified duration"""
        browser = await self.connect()
        context = await browser.new_context()
        page = await context.new_page()
        
        requests_log = []
        
        # Listen to network events
        page.on("request", lambda request: requests_log.append({
            "url": request.url,
            "method": request.method,
            "headers": dict(request.headers),
            "timestamp": asyncio.get_event_loop().time()
        }))
        
        try:
            await page.goto(url)
            await asyncio.sleep(duration)  # Monitor for specified duration
            
            return {
                "url": url,
                "monitoring_duration": duration,
                "total_requests": len(requests_log),
                "requests": requests_log
            }
            
        finally:
            await context.close()
    
    async def close(self):
        """Close connections"""
        if self.browser:
            await self.browser.close()
        if self.playwright:
            await self.playwright.stop()

# Usage example
async def demo():
    automation = AIOSandboxPlaywright()
    
    try:
        # Example: Fill a search form
        form_data = {
            "input[name='q']": "AIO Sandbox browser automation",
            "input[name='search']": ""
        }
        
        result = await automation.automate_form_fill("https://example.com/search", form_data)
        print(f"Form automation result: {result}")
        
        # Example: Monitor network traffic
        network_data = await automation.monitor_network("https://example.com", duration=10)
        print(f"Network monitoring: {network_data['total_requests']} requests captured")
        
    finally:
        await automation.close()

asyncio.run(demo())

VNC Visual Access

For visual browser interaction, use the VNC interface:

VNC URL: http://localhost:8080/vnc/index.html?autoconnect=true

Features:

Full desktop environment
Visual browser interaction
Keyboard and mouse input
Real-time screen sharing
Screenshot capture

JavaScript Browser Control

For web-based automation, use the CDP directly:

class AIOSandboxCDP {
    constructor(baseUrl = 'http://localhost:8080') {
        this.baseUrl = baseUrl;
        this.cdpWs = null;
    }
    
    async connect() {
        // Get CDP URL
        const response = await fetch(`${this.baseUrl}/cdp/json/version`);
        const data = await response.json();
        const wsUrl = data.webSocketDebuggerUrl;
        
        // Connect to CDP WebSocket
        this.cdpWs = new WebSocket(wsUrl);
        
        return new Promise((resolve) => {
            this.cdpWs.onopen = () => {
                console.log('Connected to AIO Sandbox CDP');
                resolve();
            };
        });
    }
    
    async sendCommand(method, params = {}) {
        const message = {
            id: Date.now(),
            method,
            params
        };
        
        return new Promise((resolve, reject) => {
            const messageHandler = (event) => {
                const response = JSON.parse(event.data);
                if (response.id === message.id) {
                    this.cdpWs.removeEventListener('message', messageHandler);
                    if (response.error) {
                        reject(new Error(response.error.message));
                    } else {
                        resolve(response.result);
                    }
                }
            };
            
            this.cdpWs.addEventListener('message', messageHandler);
            this.cdpWs.send(JSON.stringify(message));
        });
    }
    
    async navigateToUrl(url) {
        await this.sendCommand('Page.enable');
        const result = await this.sendCommand('Page.navigate', { url });
        
        // Wait for page load
        return new Promise((resolve) => {
            const loadHandler = (event) => {
                const message = JSON.parse(event.data);
                if (message.method === 'Page.loadEventFired') {
                    this.cdpWs.removeEventListener('message', loadHandler);
                    resolve(result);
                }
            };
            this.cdpWs.addEventListener('message', loadHandler);
        });
    }
    
    async takeScreenshot() {
        const result = await this.sendCommand('Page.captureScreenshot', {
            format: 'png',
            quality: 90
        });
        return result.data; // Base64 encoded image
    }
}

// Usage
const cdp = new AIOSandboxCDP();
await cdp.connect();
await cdp.navigateToUrl('https://example.com');
const screenshot = await cdp.takeScreenshot();
console.log('Screenshot captured:', screenshot.length, 'bytes');

Best Practices

Resource Management: Always close pages and contexts when done
Error Handling: Wrap CDP operations in try-catch blocks
Wait Strategies: Use appropriate wait conditions for dynamic content
Screenshot Timing: Allow sufficient time for page rendering
Network Monitoring: Set reasonable monitoring durations

Common Use Cases

Web Scraping: Extract data from dynamic websites
UI Testing: Automated testing of web applications
Screenshot Generation: Capture page visuals for reports
Form Automation: Fill and submit forms programmatically
Performance Monitoring: Track network requests and timing

Next Steps

Terminal Integration: Combine with shell commands → Shell API
File Operations: Save extracted data → File API
Practical Examples: See browser automation in action → Browser Examples

ON THIS PAGE

Browser CDP API#

Overview#

Getting CDP URL#

Browser Use Integration#

Playwright Integration#

VNC Visual Access#

JavaScript Browser Control#

Best Practices#

Common Use Cases#

Next Steps#