Scaling Codes Logo
Scaling Codes

Performance MCP

Monitor and optimize AI model performance with metrics and caching strategies

PerformanceAdvancedMCP Protocol

Overview

The Performance MCP provides AI models with tools to monitor their own performance, implement caching strategies, and optimize resource usage while maintaining operational efficiency and cost control.

This protocol is essential for AI models that need to track performance metrics, implement intelligent caching, and optimize their operations based on real-time data.

Implementation Guide

1. Performance MCP Server

import { Server } from '@modelcontextprotocol/sdk/server'
import { PerformanceTool } from './tools/PerformanceTool'
import { CacheManager } from './cache/CacheManager'
import { MetricsCollector } from './metrics/MetricsCollector'

const server = new Server({
  name: 'performance-mcp',
  version: '1.0.0'
})

// Initialize performance components
const cacheManager = new CacheManager({
  redisUrl: process.env.REDIS_URL,
  defaultTTL: 3600,
  maxSize: 100 * 1024 * 1024 // 100MB
})

const metricsCollector = new MetricsCollector({
  prometheusEnabled: true,
  customMetrics: true,
  alerting: true
})

// Register performance tool
server.tool(new PerformanceTool({
  cacheManager,
  metricsCollector,
  performanceThresholds: {
    responseTime: 1000, // 1 second
    memoryUsage: 0.8,  // 80%
    cpuUsage: 0.7      // 70%
  }
}))

server.listen({
  port: 3000,
  host: 'localhost'
})

2. Performance Tool Implementation

import { Tool } from '@modelcontextprotocol/sdk/server'

interface PerformanceConfig {
  cacheManager: any
  metricsCollector: any
  performanceThresholds: {
    responseTime: number
    memoryUsage: number
    cpuUsage: number
  }
}

export class PerformanceTool extends Tool {
  private config: PerformanceConfig
  private startTime: number = 0
  
  constructor(config: PerformanceConfig) {
    super({
      name: 'performance',
      description: 'Monitor and optimize AI model performance',
      inputSchema: {
        type: 'object',
        properties: {
          operation: { type: 'string', enum: ['cache_get', 'cache_set', 'metrics_record', 'performance_check'] },
          key: { type: 'string' },
          value: { type: 'string' },
          ttl: { type: 'number' },
          metric: { type: 'string' },
          value: { type: 'number' }
        },
        required: ['operation']
      }
    })
    this.config = config
  }

  async execute(input: any) {
    this.startTime = Date.now()
    
    try {
      let result: any
      
      switch (input.operation) {
        case 'cache_get':
          result = await this.cacheGet(input.key)
          break
        case 'cache_set':
          result = await this.cacheSet(input.key, input.value, input.ttl)
          break
        case 'metrics_record':
          result = await this.recordMetric(input.metric, input.value)
          break
        case 'performance_check':
          result = await this.performanceCheck()
          break
        default:
          throw new Error(`Unknown operation: ${input.operation}`)
      }
      
      // Record operation performance
      const duration = Date.now() - this.startTime
      await this.recordMetric('operation_duration', duration)
      
      return result
    } catch (error) {
      // Record error metrics
      await this.recordMetric('operation_errors', 1)
      throw error
    }
  }

  private async cacheGet(key: string) {
    const startTime = Date.now()
    const value = await this.config.cacheManager.get(key)
    const duration = Date.now() - startTime
    
    await this.recordMetric('cache_get_duration', duration)
    await this.recordMetric('cache_hit', value ? 1 : 0)
    
    return {
      value,
      cached: !!value,
      duration,
      timestamp: new Date().toISOString()
    }
  }

  private async cacheSet(key: string, value: string, ttl?: number) {
    const startTime = Date.now()
    await this.config.cacheManager.set(key, value, ttl)
    const duration = Date.now() - startTime
    
    await this.recordMetric('cache_set_duration', duration)
    await this.recordMetric('cache_set', 1)
    
    return {
      success: true,
      key,
      duration,
      timestamp: new Date().toISOString()
    }
  }

  private async recordMetric(metric: string, value: number) {
    await this.config.metricsCollector.record(metric, value)
    
    return {
      metric,
      value,
      recorded: true,
      timestamp: new Date().toISOString()
    }
  }

  private async performanceCheck() {
    const memoryUsage = process.memoryUsage()
    const cpuUsage = process.cpuUsage()
    
    const metrics = {
      memory: {
        rss: memoryUsage.rss,
        heapUsed: memoryUsage.heapUsed,
        heapTotal: memoryUsage.heapTotal,
        external: memoryUsage.external
      },
      cpu: {
        user: cpuUsage.user,
        system: cpuUsage.system
      },
      uptime: process.uptime(),
      timestamp: new Date().toISOString()
    }
    
    // Check thresholds
    const alerts = []
    if (memoryUsage.heapUsed / memoryUsage.heapTotal > this.config.performanceThresholds.memoryUsage) {
      alerts.push('High memory usage detected')
    }
    
    // Record performance metrics
    await this.recordMetric('memory_usage', memoryUsage.heapUsed / memoryUsage.heapTotal)
    await this.recordMetric('uptime', process.uptime())
    
    return {
      metrics,
      alerts,
      thresholds: this.config.performanceThresholds
    }
  }
}

3. Cache Manager Implementation

import Redis from 'ioredis'

export class CacheManager {
  private redis: Redis
  private config: {
    defaultTTL: number
    maxSize: number
  }
  
  constructor(config: { redisUrl: string; defaultTTL: number; maxSize: number }) {
    this.redis = new Redis(config.redisUrl)
    this.config = config
  }
  
  async get(key: string): Promise<string | null> {
    try {
      const value = await this.redis.get(key)
      if (value) {
        await this.redis.expire(key, this.config.defaultTTL)
      }
      return value
    } catch (error) {
      console.error('Cache get error:', error)
      return null
    }
  }
  
  async set(key: string, value: string, ttl?: number): Promise<void> {
    try {
      const finalTTL = ttl || this.config.defaultTTL
      await this.redis.setex(key, finalTTL, value)
      
      // Check cache size and evict if necessary
      await this.checkAndEvict()
    } catch (error) {
      console.error('Cache set error:', error)
    }
  }
  
  async delete(key: string): Promise<void> {
    try {
      await this.redis.del(key)
    } catch (error) {
      console.error('Cache delete error:', error)
    }
  }
  
  async clear(): Promise<void> {
    try {
      await this.redis.flushdb()
    } catch (error) {
      console.error('Cache clear error:', error)
    }
  }
  
  private async checkAndEvict(): Promise<void> {
    try {
      const keys = await this.redis.keys('*')
      if (keys.length > 1000) { // Simple eviction strategy
        const randomKeys = keys.slice(0, 100)
        await this.redis.del(...randomKeys)
      }
    } catch (error) {
      console.error('Cache eviction error:', error)
    }
  }
  
  async getStats(): Promise<any> {
    try {
      const info = await this.redis.info()
      const keys = await this.redis.dbsize()
      
      return {
        keys,
        info: info.split('\r\n').reduce((acc: any, line: string) => {
          const [key, value] = line.split(':')
          if (key && value) acc[key] = value
          return acc
        }, {})
      }
    } catch (error) {
      console.error('Cache stats error:', error)
      return { keys: 0, info: {} }
    }
  }
}

4. Client Integration

import { Client } from '@modelcontextprotocol/sdk/client'

const client = new Client({
  serverUrl: 'ws://localhost:3000'
})

// Example: Cache a response
const cacheResult = await client.tools.performance.execute({
  operation: 'cache_set',
  key: 'user_profile_123',
  value: JSON.stringify({ name: 'John Doe', email: 'john@example.com' }),
  ttl: 3600 // 1 hour
})

console.log('Cached result:', cacheResult)

// Example: Retrieve from cache
const cachedData = await client.tools.performance.execute({
  operation: 'cache_get',
  key: 'user_profile_123'
})

if (cachedData.cached) {
  console.log('Retrieved from cache:', cachedData.value)
} else {
  console.log('Cache miss, fetching from database...')
}

// Example: Record custom metrics
await client.tools.performance.execute({
  operation: 'metrics_record',
  metric: 'api_calls_total',
  value: 1
})

// Example: Check system performance
const performance = await client.tools.performance.execute({
  operation: 'performance_check'
})

console.log('Memory usage:', performance.metrics.memory)
console.log('Alerts:', performance.alerts)

Configuration Example

# performance-mcp-config.yaml
server:
  name: performance-mcp
  version: 1.0.0
  port: 3000
  host: localhost

performance:
  cache:
    redis_url: ${REDIS_URL}
    default_ttl: 3600  # 1 hour
    max_size: 104857600  # 100MB
    eviction_policy: "lru"
    compression: true
    encryption: false
    
    strategies:
      - name: "user_profiles"
        ttl: 7200  # 2 hours
        max_size: 10485760  # 10MB
        compression: true
      
      - name: "api_responses"
        ttl: 300  # 5 minutes
        max_size: 52428800  # 50MB
        compression: false
  
  metrics:
    prometheus:
      enabled: true
      port: 9090
      path: "/metrics"
    
    custom_metrics:
      - operation_duration
      - cache_hit_rate
      - memory_usage
      - cpu_usage
      - error_rate
    
    thresholds:
      response_time: 1000  # 1 second
      memory_usage: 0.8   # 80%
      cpu_usage: 0.7      # 70%
      error_rate: 0.05    # 5%
    
    alerting:
      enabled: true
      webhook_url: ${ALERT_WEBBOOK_URL}
      channels:
        - slack
        - email
        - pagerduty
  
  optimization:
    auto_scaling:
      enabled: true
      min_instances: 1
      max_instances: 10
      scale_up_threshold: 0.7
      scale_down_threshold: 0.3
    
    resource_management:
      memory_limit: 1073741824  # 1GB
      cpu_limit: 1.0
      enable_gc: true
      gc_threshold: 0.8
    
    caching_strategies:
      predictive_caching: true
      cache_warming: true
      intelligent_ttl: true
      cache_invalidation: "lazy"
  
  monitoring:
    health_checks:
      enabled: true
      interval: 30  # seconds
      timeout: 5    # seconds
      retries: 3
    
    logging:
      level: "info"
      format: "json"
      retention: "30d"
      structured: true
    
    tracing:
      enabled: true
      sampler: 0.1
      jaeger_endpoint: "http://jaeger:14268/api/traces"

Common Use Cases

Response Optimization

AI models can cache frequently requested responses, implement intelligent TTL strategies, and optimize response times based on usage patterns and performance metrics.

Resource Management

AI models can monitor their own resource usage, implement auto-scaling strategies, and optimize memory and CPU usage based on real-time performance data.

Performance Monitoring

AI models can track their own performance metrics, identify bottlenecks, and implement self-optimization strategies based on historical and real-time data.