Performance MCP
Monitor and optimize AI model performance with metrics and caching strategies
Overview
The Performance MCP provides AI models with tools to monitor their own performance, implement caching strategies, and optimize resource usage while maintaining operational efficiency and cost control.
This protocol is essential for AI models that need to track performance metrics, implement intelligent caching, and optimize their operations based on real-time data.
Implementation Guide
1. Performance MCP Server
import { Server } from '@modelcontextprotocol/sdk/server'
import { PerformanceTool } from './tools/PerformanceTool'
import { CacheManager } from './cache/CacheManager'
import { MetricsCollector } from './metrics/MetricsCollector'
const server = new Server({
name: 'performance-mcp',
version: '1.0.0'
})
// Initialize performance components
const cacheManager = new CacheManager({
redisUrl: process.env.REDIS_URL,
defaultTTL: 3600,
maxSize: 100 * 1024 * 1024 // 100MB
})
const metricsCollector = new MetricsCollector({
prometheusEnabled: true,
customMetrics: true,
alerting: true
})
// Register performance tool
server.tool(new PerformanceTool({
cacheManager,
metricsCollector,
performanceThresholds: {
responseTime: 1000, // 1 second
memoryUsage: 0.8, // 80%
cpuUsage: 0.7 // 70%
}
}))
server.listen({
port: 3000,
host: 'localhost'
})
2. Performance Tool Implementation
import { Tool } from '@modelcontextprotocol/sdk/server'
interface PerformanceConfig {
cacheManager: any
metricsCollector: any
performanceThresholds: {
responseTime: number
memoryUsage: number
cpuUsage: number
}
}
export class PerformanceTool extends Tool {
private config: PerformanceConfig
private startTime: number = 0
constructor(config: PerformanceConfig) {
super({
name: 'performance',
description: 'Monitor and optimize AI model performance',
inputSchema: {
type: 'object',
properties: {
operation: { type: 'string', enum: ['cache_get', 'cache_set', 'metrics_record', 'performance_check'] },
key: { type: 'string' },
value: { type: 'string' },
ttl: { type: 'number' },
metric: { type: 'string' },
value: { type: 'number' }
},
required: ['operation']
}
})
this.config = config
}
async execute(input: any) {
this.startTime = Date.now()
try {
let result: any
switch (input.operation) {
case 'cache_get':
result = await this.cacheGet(input.key)
break
case 'cache_set':
result = await this.cacheSet(input.key, input.value, input.ttl)
break
case 'metrics_record':
result = await this.recordMetric(input.metric, input.value)
break
case 'performance_check':
result = await this.performanceCheck()
break
default:
throw new Error(`Unknown operation: ${input.operation}`)
}
// Record operation performance
const duration = Date.now() - this.startTime
await this.recordMetric('operation_duration', duration)
return result
} catch (error) {
// Record error metrics
await this.recordMetric('operation_errors', 1)
throw error
}
}
private async cacheGet(key: string) {
const startTime = Date.now()
const value = await this.config.cacheManager.get(key)
const duration = Date.now() - startTime
await this.recordMetric('cache_get_duration', duration)
await this.recordMetric('cache_hit', value ? 1 : 0)
return {
value,
cached: !!value,
duration,
timestamp: new Date().toISOString()
}
}
private async cacheSet(key: string, value: string, ttl?: number) {
const startTime = Date.now()
await this.config.cacheManager.set(key, value, ttl)
const duration = Date.now() - startTime
await this.recordMetric('cache_set_duration', duration)
await this.recordMetric('cache_set', 1)
return {
success: true,
key,
duration,
timestamp: new Date().toISOString()
}
}
private async recordMetric(metric: string, value: number) {
await this.config.metricsCollector.record(metric, value)
return {
metric,
value,
recorded: true,
timestamp: new Date().toISOString()
}
}
private async performanceCheck() {
const memoryUsage = process.memoryUsage()
const cpuUsage = process.cpuUsage()
const metrics = {
memory: {
rss: memoryUsage.rss,
heapUsed: memoryUsage.heapUsed,
heapTotal: memoryUsage.heapTotal,
external: memoryUsage.external
},
cpu: {
user: cpuUsage.user,
system: cpuUsage.system
},
uptime: process.uptime(),
timestamp: new Date().toISOString()
}
// Check thresholds
const alerts = []
if (memoryUsage.heapUsed / memoryUsage.heapTotal > this.config.performanceThresholds.memoryUsage) {
alerts.push('High memory usage detected')
}
// Record performance metrics
await this.recordMetric('memory_usage', memoryUsage.heapUsed / memoryUsage.heapTotal)
await this.recordMetric('uptime', process.uptime())
return {
metrics,
alerts,
thresholds: this.config.performanceThresholds
}
}
}
3. Cache Manager Implementation
import Redis from 'ioredis'
export class CacheManager {
private redis: Redis
private config: {
defaultTTL: number
maxSize: number
}
constructor(config: { redisUrl: string; defaultTTL: number; maxSize: number }) {
this.redis = new Redis(config.redisUrl)
this.config = config
}
async get(key: string): Promise<string | null> {
try {
const value = await this.redis.get(key)
if (value) {
await this.redis.expire(key, this.config.defaultTTL)
}
return value
} catch (error) {
console.error('Cache get error:', error)
return null
}
}
async set(key: string, value: string, ttl?: number): Promise<void> {
try {
const finalTTL = ttl || this.config.defaultTTL
await this.redis.setex(key, finalTTL, value)
// Check cache size and evict if necessary
await this.checkAndEvict()
} catch (error) {
console.error('Cache set error:', error)
}
}
async delete(key: string): Promise<void> {
try {
await this.redis.del(key)
} catch (error) {
console.error('Cache delete error:', error)
}
}
async clear(): Promise<void> {
try {
await this.redis.flushdb()
} catch (error) {
console.error('Cache clear error:', error)
}
}
private async checkAndEvict(): Promise<void> {
try {
const keys = await this.redis.keys('*')
if (keys.length > 1000) { // Simple eviction strategy
const randomKeys = keys.slice(0, 100)
await this.redis.del(...randomKeys)
}
} catch (error) {
console.error('Cache eviction error:', error)
}
}
async getStats(): Promise<any> {
try {
const info = await this.redis.info()
const keys = await this.redis.dbsize()
return {
keys,
info: info.split('\r\n').reduce((acc: any, line: string) => {
const [key, value] = line.split(':')
if (key && value) acc[key] = value
return acc
}, {})
}
} catch (error) {
console.error('Cache stats error:', error)
return { keys: 0, info: {} }
}
}
}
4. Client Integration
import { Client } from '@modelcontextprotocol/sdk/client'
const client = new Client({
serverUrl: 'ws://localhost:3000'
})
// Example: Cache a response
const cacheResult = await client.tools.performance.execute({
operation: 'cache_set',
key: 'user_profile_123',
value: JSON.stringify({ name: 'John Doe', email: 'john@example.com' }),
ttl: 3600 // 1 hour
})
console.log('Cached result:', cacheResult)
// Example: Retrieve from cache
const cachedData = await client.tools.performance.execute({
operation: 'cache_get',
key: 'user_profile_123'
})
if (cachedData.cached) {
console.log('Retrieved from cache:', cachedData.value)
} else {
console.log('Cache miss, fetching from database...')
}
// Example: Record custom metrics
await client.tools.performance.execute({
operation: 'metrics_record',
metric: 'api_calls_total',
value: 1
})
// Example: Check system performance
const performance = await client.tools.performance.execute({
operation: 'performance_check'
})
console.log('Memory usage:', performance.metrics.memory)
console.log('Alerts:', performance.alerts)
Configuration Example
# performance-mcp-config.yaml
server:
name: performance-mcp
version: 1.0.0
port: 3000
host: localhost
performance:
cache:
redis_url: ${REDIS_URL}
default_ttl: 3600 # 1 hour
max_size: 104857600 # 100MB
eviction_policy: "lru"
compression: true
encryption: false
strategies:
- name: "user_profiles"
ttl: 7200 # 2 hours
max_size: 10485760 # 10MB
compression: true
- name: "api_responses"
ttl: 300 # 5 minutes
max_size: 52428800 # 50MB
compression: false
metrics:
prometheus:
enabled: true
port: 9090
path: "/metrics"
custom_metrics:
- operation_duration
- cache_hit_rate
- memory_usage
- cpu_usage
- error_rate
thresholds:
response_time: 1000 # 1 second
memory_usage: 0.8 # 80%
cpu_usage: 0.7 # 70%
error_rate: 0.05 # 5%
alerting:
enabled: true
webhook_url: ${ALERT_WEBBOOK_URL}
channels:
- slack
- email
- pagerduty
optimization:
auto_scaling:
enabled: true
min_instances: 1
max_instances: 10
scale_up_threshold: 0.7
scale_down_threshold: 0.3
resource_management:
memory_limit: 1073741824 # 1GB
cpu_limit: 1.0
enable_gc: true
gc_threshold: 0.8
caching_strategies:
predictive_caching: true
cache_warming: true
intelligent_ttl: true
cache_invalidation: "lazy"
monitoring:
health_checks:
enabled: true
interval: 30 # seconds
timeout: 5 # seconds
retries: 3
logging:
level: "info"
format: "json"
retention: "30d"
structured: true
tracing:
enabled: true
sampler: 0.1
jaeger_endpoint: "http://jaeger:14268/api/traces"
Common Use Cases
Response Optimization
AI models can cache frequently requested responses, implement intelligent TTL strategies, and optimize response times based on usage patterns and performance metrics.
Resource Management
AI models can monitor their own resource usage, implement auto-scaling strategies, and optimize memory and CPU usage based on real-time performance data.
Performance Monitoring
AI models can track their own performance metrics, identify bottlenecks, and implement self-optimization strategies based on historical and real-time data.