Scaling Codes Logo
Scaling Codes

Web Browser MCP

Allow AI models to browse the web, extract information, and interact with web services

Web IntegrationAdvancedMCP Protocol

Overview

The Web Browser MCP enables AI models to safely browse the web, extract information from websites, and interact with web services while maintaining security controls and preventing malicious activities.

This protocol is essential for AI models that need real-time information, web scraping capabilities, or the ability to interact with web-based applications and services.

Implementation Guide

1. Web Browser MCP Server

import { Server } from '@modelcontextprotocol/sdk/server'
import { WebBrowserTool } from './tools/WebBrowserTool'
import puppeteer from 'puppeteer'

const server = new Server({
  name: 'web-browser-mcp',
  version: '1.0.0'
})

// Register web browser tool
server.tool(new WebBrowserTool({
  allowedDomains: ['example.com', 'api.github.com', 'news.ycombinator.com'],
  blockedDomains: ['malicious-site.com', 'phishing-site.net'],
  maxPageSize: 5 * 1024 * 1024, // 5MB
  timeout: 30000, // 30 seconds
  headless: true
}))

server.listen({
  port: 3000,
  host: 'localhost'
})

2. Web Browser Tool Implementation

import { Tool } from '@modelcontextprotocol/sdk/server'
import puppeteer, { Browser, Page } from 'puppeteer'

interface WebBrowserConfig {
  allowedDomains: string[]
  blockedDomains: string[]
  maxPageSize: number
  timeout: number
  headless: boolean
}

export class WebBrowserTool extends Tool {
  private config: WebBrowserConfig
  private browser: Browser | null = null
  
  constructor(config: WebBrowserConfig) {
    super({
      name: 'web_browser',
      description: 'Browse web pages and extract information safely',
      inputSchema: {
        type: 'object',
        properties: {
          action: { type: 'string', enum: ['navigate', 'extract', 'screenshot', 'click'] },
          url: { type: 'string' },
          selector: { type: 'string' },
          waitFor: { type: 'string' }
        },
        required: ['action', 'url']
      }
    })
    this.config = config
  }

  async execute(input: any) {
    // Validate URL security
    if (!this.isUrlAllowed(input.url)) {
      throw new Error(`URL ${input.url} not allowed`)
    }

    // Initialize browser if needed
    if (!this.browser) {
      this.browser = await puppeteer.launch({
        headless: this.config.headless,
        args: ['--no-sandbox', '--disable-setuid-sandbox']
      })
    }

    try {
      const page = await this.browser.newPage()
      page.setDefaultTimeout(this.config.timeout)

      switch (input.action) {
        case 'navigate':
          return await this.navigate(page, input.url)
        case 'extract':
          return await this.extractContent(page, input.url, input.selector)
        case 'screenshot':
          return await this.takeScreenshot(page, input.url)
        case 'click':
          return await this.clickElement(page, input.url, input.selector)
        default:
          throw new Error(`Unknown action: ${input.action}`)
      }
    } catch (error) {
      throw new Error(`Web browser operation failed: ${error.message}`)
    }
  }

  private isUrlAllowed(url: string): boolean {
    try {
      const urlObj = new URL(url)
      const domain = urlObj.hostname
      
      // Check if domain is blocked
      if (this.config.blockedDomains.some(blocked => domain.includes(blocked))) {
        return false
      }
      
      // Check if domain is allowed
      return this.config.allowedDomains.some(allowed => domain.includes(allowed))
    } catch {
      return false
    }
  }

  private async navigate(page: Page, url: string) {
    await page.goto(url, { waitUntil: 'networkidle2' })
    
    const title = await page.title()
    const content = await page.content()
    
    if (Buffer.byteLength(content, 'utf8') > this.config.maxPageSize) {
      throw new Error('Page size exceeds maximum allowed size')
    }
    
    return {
      title,
      url,
      contentLength: Buffer.byteLength(content, 'utf8'),
      status: 'success'
    }
  }

  private async extractContent(page: Page, url: string, selector?: string) {
    await page.goto(url, { waitUntil: 'networkidle2' })
    
    let content: string
    if (selector) {
      content = await page.$eval(selector, el => el.textContent || '')
    } else {
      content = await page.evaluate(() => {
        // Extract main content, removing navigation, ads, etc.
        const main = document.querySelector('main') || document.querySelector('article') || document.body
        return main.innerText || main.textContent || ''
      })
    }
    
    return {
      url,
      content: content.trim(),
      extractedAt: new Date().toISOString()
    }
  }

  private async takeScreenshot(page: Page, url: string) {
    await page.goto(url, { waitUntil: 'networkidle2' })
    
    const screenshot = await page.screenshot({
      type: 'png',
      fullPage: true
    })
    
    return {
      url,
      screenshot: screenshot.toString('base64'),
      format: 'png',
      capturedAt: new Date().toISOString()
    }
  }

  private async clickElement(page: Page, url: string, selector: string) {
    await page.goto(url, { waitUntil: 'networkidle2' })
    
    await page.waitForSelector(selector)
    await page.click(selector)
    
    // Wait for any navigation or content changes
    await page.waitForTimeout(1000)
    
    const newUrl = page.url()
    const title = await page.title()
    
    return {
      originalUrl: url,
      newUrl,
      title,
      clickedElement: selector,
      actionCompleted: true
    }
  }

  async cleanup() {
    if (this.browser) {
      await this.browser.close()
      this.browser = null
    }
  }
}

3. Client Integration

import { Client } from '@modelcontextprotocol/sdk/client'

const client = new Client({
  serverUrl: 'ws://localhost:3000'
})

// Example: Navigate to a webpage
const pageInfo = await client.tools.web_browser.execute({
  action: 'navigate',
  url: 'https://news.ycombinator.com'
})

console.log('Page title:', pageInfo.title)

// Example: Extract specific content
const content = await client.tools.web_browser.execute({
  action: 'extract',
  url: 'https://api.github.com/users/octocat',
  selector: '.profile-content'
})

console.log('Extracted content:', content.content)

// Example: Take a screenshot
const screenshot = await client.tools.web_browser.execute({
  action: 'screenshot',
  url: 'https://example.com'
})

console.log('Screenshot captured:', screenshot.capturedAt)

// Example: Click on an element
const clickResult = await client.tools.web_browser.execute({
  action: 'click',
  url: 'https://example.com',
  selector: 'button[type="submit"]'
})

console.log('Clicked element:', clickResult.clickedElement)

Configuration Example

# web-browser-mcp-config.yaml
server:
  name: web-browser-mcp
  version: 1.0.0
  port: 3000
  host: localhost

web_browser:
  allowed_domains:
    - news.ycombinator.com
    - api.github.com
    - stackoverflow.com
    - reddit.com
    - wikipedia.org
  
  blocked_domains:
    - malicious-site.com
    - phishing-site.net
    - adult-content.com
    - gambling-site.com
  
  limits:
    max_page_size: 5242880  # 5MB
    max_concurrent_pages: 3
    timeout: 30000  # 30 seconds
    max_redirects: 5
  
  browser_settings:
    headless: true
    user_agent: "MCP-WebBrowser/1.0"
    viewport:
      width: 1920
      height: 1080
    
    security:
      disable_images: false
      disable_javascript: false
      disable_cookies: true
      block_popups: true
  
  content_filtering:
    block_ads: true
    remove_navigation: true
    extract_main_content: true
    sanitize_html: true
  
  monitoring:
    track_requests: true
    log_navigation: true
    monitor_performance: true
    alert_on_errors: true

Common Use Cases

Real-Time Information Gathering

AI models can gather current information from news sites, social media, and other web sources to provide up-to-date responses and insights.

Web Scraping & Data Extraction

AI models can extract structured data from websites, monitor changes, and collect information for analysis and reporting purposes.

Web Application Testing

AI models can interact with web applications, test user flows, and validate functionality through automated browser interactions.