Debugging AI Agents

Debug and troubleshoot agent issues effectively with comprehensive debugging tools and techniques.

Overview

Debugging AI agents requires specialized tools and techniques to identify and resolve issues. This guide covers:

Common Issues - Typical problems and their solutions
Debugging Tools - Available debugging features
Trace Analysis - Analyzing agent traces for issues
Performance Debugging - Identifying performance bottlenecks
Error Analysis - Understanding and resolving errors

Common Issues

1. Agent Not Responding

from agenticants import AgenticAnts
import time
 
ants = AgenticAnts(api_key=os.getenv('AGENTICANTS_API_KEY'))
 
class AgentDebugger:
    def __init__(self):
        self.ants = ants
    
    def debug_agent_not_responding(self, agent_name: str):
        """Debug agent not responding issues"""
        
        print(f"🔍 Debugging Agent: {agent_name}")
        print("=" * 50)
        
        # Check agent status
        agent_status = self.ants.agents.get_status(agent_name)
        print(f"Agent Status: {agent_status['status']}")
        
        if agent_status['status'] != 'active':
            print(f"❌ Agent is not active. Status: {agent_status['status']}")
            return self._fix_agent_status(agent_name)
        
        # Check recent traces
        recent_traces = self.ants.traces.query({
            "agent": agent_name,
            "status": "error",
            "period": "last_1h"
        })
        
        if recent_traces:
            print(f"❌ Found {len(recent_traces)} errors in the last hour")
            for trace in recent_traces[:5]:  # Show last 5 errors
                print(f"  Error: {trace.error}")
                print(f"  Time: {trace.timestamp}")
                print(f"  Input: {trace.input[:100]}...")
        else:
            print("✅ No recent errors found")
        
        # Check performance metrics
        performance = self.ants.metrics.get_agent_metrics(agent_name, period="last_1h")
        print(f"Average Response Time: {performance['avg_response_time']}ms")
        print(f"Success Rate: {performance['success_rate']}%")
        
        if performance['avg_response_time'] > 30000:  # 30 seconds
            print("⚠️  High response time detected")
            return self._debug_performance_issues(agent_name)
        
        return {"status": "healthy", "issues": []}
    
    def _fix_agent_status(self, agent_name: str):
        """Fix agent status issues"""
        
        print(f"🔧 Fixing agent status for {agent_name}")
        
        # Restart agent
        restart_result = self.ants.agents.restart(agent_name)
        
        if restart_result['success']:
            print("✅ Agent restarted successfully")
        else:
            print(f"❌ Failed to restart agent: {restart_result['error']}")
        
        return restart_result
    
    def _debug_performance_issues(self, agent_name: str):
        """Debug performance issues"""
        
        print(f"🐌 Debugging performance issues for {agent_name}")
        
        # Get performance bottlenecks
        bottlenecks = self.ants.sre.find_bottlenecks(agent_name)
        
        print("Performance Bottlenecks:")
        for bottleneck in bottlenecks:
            print(f"  {bottleneck['operation']}: {bottleneck['avg_time']}ms")
            print(f"    Occurrences: {bottleneck['count']}")
            print(f"    Optimization: {bottleneck['optimization']}")
        
        return bottlenecks

2. High Error Rates

import { AgenticAnts } from '@agenticants/sdk'
 
const ants = new AgenticAnts({ apiKey: process.env.AGENTICANTS_API_KEY })
 
class ErrorRateDebugger {
  private ants: AgenticAnts
  
  async debugHighErrorRates(agentName: string) {
    console.log(`🔍 Debugging High Error Rates for ${agentName}`)
    console.log('='.repeat(50))
    
    // Get error metrics
    const errorMetrics = await this.ants.metrics.getErrorMetrics({
      agent: agentName,
      period: 'last_24h'
    })
    
    console.log(`Error Rate: ${errorMetrics.errorRate}%`)
    console.log(`Total Errors: ${errorMetrics.totalErrors}`)
    console.log(`Success Rate: ${errorMetrics.successRate}%`)
    
    if (errorMetrics.errorRate > 5) {
      console.log('❌ High error rate detected')
      
      // Analyze error types
      const errorTypes = await this.ants.metrics.getErrorTypes({
        agent: agentName,
        period: 'last_24h'
      })
      
      console.log('\nError Types:')
      errorTypes.forEach(errorType => {
        console.log(`  ${errorType.type}: ${errorType.count} occurrences`)
        console.log(`    Percentage: ${errorType.percentage}%`)
      })
      
      // Get recent errors
      const recentErrors = await this.ants.traces.query({
        agent: agentName,
        status: 'error',
        period: 'last_1h'
      })
      
      console.log('\nRecent Errors:')
      recentErrors.slice(0, 5).forEach(error => {
        console.log(`  Error: ${error.error}`)
        console.log(`  Time: ${error.timestamp}`)
        console.log(`  Input: ${error.input.substring(0, 100)}...`)
        console.log('  ---')
      })
      
      return this.analyzeErrorPatterns(recentErrors)
    }
    
    console.log('✅ Error rate is within acceptable range')
    return { status: 'healthy', errorRate: errorMetrics.errorRate }
  }
  
  private async analyzeErrorPatterns(errors: any[]) {
    console.log('🔍 Analyzing Error Patterns')
    console.log('='.repeat(50))
    
    // Group errors by type
    const errorGroups = this.groupErrorsByType(errors)
    
    console.log('Error Patterns:')
    Object.entries(errorGroups).forEach(([type, errorList]) => {
      console.log(`\n${type}: ${errorList.length} occurrences`)
      
      // Find common patterns
      const patterns = this.findCommonPatterns(errorList)
      patterns.forEach(pattern => {
        console.log(`  Pattern: ${pattern.description}`)
        console.log(`  Frequency: ${pattern.frequency}`)
        console.log(`  Solution: ${pattern.solution}`)
      })
    })
    
    return errorGroups
  }
  
  private groupErrorsByType(errors: any[]): Record<string, any[]> {
    const groups: Record<string, any[]> = {}
    
    errors.forEach(error => {
      const type = error.errorType || 'unknown'
      if (!groups[type]) {
        groups[type] = []
      }
      groups[type].push(error)
    })
    
    return groups
  }
  
  private findCommonPatterns(errors: any[]): any[] {
    // Implement pattern recognition logic
    return [
      {
        description: 'Timeout errors',
        frequency: 'high',
        solution: 'Increase timeout or optimize performance'
      }
    ]
  }
}

3. Performance Issues

class PerformanceDebugger:
    def __init__(self):
        self.ants = ants
    
    def debug_performance_issues(self, agent_name: str):
        """Debug performance issues"""
        
        print(f"🐌 Debugging Performance Issues for {agent_name}")
        print("=" * 50)
        
        # Get performance metrics
        performance = self.ants.metrics.get_agent_metrics(agent_name, period="last_24h")
        
        print(f"Average Response Time: {performance['avg_response_time']}ms")
        print(f"P95 Response Time: {performance['p95_response_time']}ms")
        print(f"P99 Response Time: {performance['p99_response_time']}ms")
        print(f"Throughput: {performance['throughput']} req/s")
        
        # Check for performance issues
        issues = []
        
        if performance['avg_response_time'] > 5000:
            issues.append("High average response time")
        
        if performance['p95_response_time'] > 10000:
            issues.append("High P95 response time")
        
        if performance['throughput'] < 10:
            issues.append("Low throughput")
        
        if issues:
            print(f"❌ Performance issues detected: {', '.join(issues)}")
            return self._analyze_performance_bottlenecks(agent_name)
        else:
            print("✅ Performance is within acceptable range")
            return {"status": "healthy", "issues": []}
    
    def _analyze_performance_bottlenecks(self, agent_name: str):
        """Analyze performance bottlenecks"""
        
        print("🔍 Analyzing Performance Bottlenecks")
        print("=" * 50)
        
        # Get bottleneck analysis
        bottlenecks = self.ants.sre.find_bottlenecks(agent_name)
        
        print("Bottlenecks:")
        for bottleneck in bottlenecks:
            print(f"\n{bottleneck['operation']}:")
            print(f"  Average time: {bottleneck['avg_time']}ms")
            print(f"  Occurrences: {bottleneck['count']}")
            print(f"  Total time: {bottleneck['total_time']}ms")
            print(f"  Optimization: {bottleneck['optimization']}")
        
        # Get slow traces
        slow_traces = self.ants.traces.query({
            "agent": agent_name,
            "duration": ">5000",  # > 5 seconds
            "period": "last_24h"
        })
        
        if slow_traces:
            print(f"\nSlow Traces ({len(slow_traces)} found):")
            for trace in slow_traces[:5]:  # Show first 5
                print(f"  Duration: {trace.duration}ms")
                print(f"  Time: {trace.timestamp}")
                print(f"  Input: {trace.input[:100]}...")
                print("  ---")
        
        return bottlenecks

Debugging Tools

1. Trace Analysis

class TraceAnalyzer:
    def __init__(self):
        self.ants = ants
    
    def analyze_trace(self, trace_id: str):
        """Analyze a specific trace"""
        
        print(f"🔍 Analyzing Trace: {trace_id}")
        print("=" * 50)
        
        # Get trace details
        trace = self.ants.traces.get(trace_id)
        
        print(f"Trace Name: {trace.name}")
        print(f"Status: {trace.status}")
        print(f"Duration: {trace.duration}ms")
        print(f"Start Time: {trace.start_time}")
        print(f"End Time: {trace.end_time}")
        
        # Analyze spans
        if trace.spans:
            print(f"\nSpans ({len(trace.spans)}):")
            for span in trace.spans:
                print(f"  {span.name}: {span.duration}ms")
                if span.status == "error":
                    print(f"    ❌ Error: {span.error}")
                else:
                    print(f"    ✅ Status: {span.status}")
        
        # Analyze metadata
        if trace.metadata:
            print(f"\nMetadata:")
            for key, value in trace.metadata.items():
                print(f"  {key}: {value}")
        
        # Check for issues
        issues = self._identify_trace_issues(trace)
        if issues:
            print(f"\nIssues Found:")
            for issue in issues:
                print(f"  ❌ {issue}")
        else:
            print(f"\n✅ No issues found")
        
        return trace
    
    def _identify_trace_issues(self, trace):
        """Identify issues in trace"""
        
        issues = []
        
        # Check duration
        if trace.duration > 30000:  # 30 seconds
            issues.append("Long duration")
        
        # Check status
        if trace.status == "error":
            issues.append("Trace failed")
        
        # Check spans
        for span in trace.spans:
            if span.status == "error":
                issues.append(f"Span error: {span.name}")
            if span.duration > 10000:  # 10 seconds
                issues.append(f"Long span duration: {span.name}")
        
        return issues

2. Error Analysis

class ErrorAnalyzer {
  private ants: AgenticAnts
  
  async analyzeErrors(agentName: string, period: string = 'last_24h') {
    console.log(`🔍 Analyzing Errors for ${agentName}`)
    console.log('='.repeat(50))
    
    // Get error summary
    const errorSummary = await this.ants.metrics.getErrorSummary({
      agent: agentName,
      period: period
    })
    
    console.log(`Total Errors: ${errorSummary.totalErrors}`)
    console.log(`Error Rate: ${errorSummary.errorRate}%`)
    console.log(`Success Rate: ${errorSummary.successRate}%`)
    
    // Get error breakdown
    const errorBreakdown = await this.ants.metrics.getErrorBreakdown({
      agent: agentName,
      period: period
    })
    
    console.log('\nError Breakdown:')
    errorBreakdown.forEach(error => {
      console.log(`  ${error.type}: ${error.count} (${error.percentage}%)`)
    })
    
    // Get recent errors
    const recentErrors = await this.ants.traces.query({
      agent: agentName,
      status: 'error',
      period: 'last_1h'
    })
    
    console.log('\nRecent Errors:')
    recentErrors.slice(0, 10).forEach(error => {
      console.log(`  ${error.timestamp}: ${error.error}`)
      console.log(`    Input: ${error.input.substring(0, 100)}...`)
      console.log('    ---')
    })
    
    return {
      summary: errorSummary,
      breakdown: errorBreakdown,
      recent: recentErrors
    }
  }
  
  async analyzeErrorPatterns(errors: any[]) {
    console.log('🔍 Analyzing Error Patterns')
    console.log('='.repeat(50))
    
    // Group errors by type
    const errorGroups = this.groupErrorsByType(errors)
    
    console.log('Error Groups:')
    Object.entries(errorGroups).forEach(([type, errorList]) => {
      console.log(`\n${type}: ${errorList.length} occurrences`)
      
      // Analyze patterns
      const patterns = this.analyzeErrorPatterns(errorList)
      patterns.forEach(pattern => {
        console.log(`  Pattern: ${pattern.description}`)
        console.log(`  Frequency: ${pattern.frequency}`)
        console.log(`  Solution: ${pattern.solution}`)
      })
    })
    
    return errorGroups
  }
  
  private groupErrorsByType(errors: any[]): Record<string, any[]> {
    const groups: Record<string, any[]> = {}
    
    errors.forEach(error => {
      const type = error.errorType || 'unknown'
      if (!groups[type]) {
        groups[type] = []
      }
      groups[type].push(error)
    })
    
    return groups
  }
  
  private analyzeErrorPatterns(errors: any[]): any[] {
    // Implement pattern analysis logic
    return [
      {
        description: 'Timeout errors',
        frequency: 'high',
        solution: 'Increase timeout or optimize performance'
      }
    ]
  }
}

3. Performance Analysis

class PerformanceAnalyzer:
    def __init__(self):
        self.ants = ants
    
    def analyze_performance(self, agent_name: str, period: str = "last_24h"):
        """Analyze performance metrics"""
        
        print(f"📊 Performance Analysis for {agent_name}")
        print("=" * 50)
        
        # Get performance metrics
        performance = self.ants.metrics.get_agent_metrics(agent_name, period=period)
        
        print(f"Request Rate: {performance['request_rate']} req/s")
        print(f"Average Response Time: {performance['avg_response_time']}ms")
        print(f"P95 Response Time: {performance['p95_response_time']}ms")
        print(f"P99 Response Time: {performance['p99_response_time']}ms")
        print(f"Error Rate: {performance['error_rate']}%")
        print(f"Success Rate: {performance['success_rate']}%")
        
        # Analyze performance trends
        trends = self.ants.metrics.get_performance_trends(agent_name, period=period)
        
        print(f"\nPerformance Trends:")
        for trend in trends[-7:]:  # Last 7 data points
            print(f"  {trend['timestamp']}: {trend['avg_response_time']}ms avg, {trend['error_rate']}% errors")
        
        # Identify performance issues
        issues = self._identify_performance_issues(performance)
        if issues:
            print(f"\nPerformance Issues:")
            for issue in issues:
                print(f"  ⚠️  {issue}")
        
        return performance
    
    def _identify_performance_issues(self, performance):
        """Identify performance issues"""
        
        issues = []
        
        if performance['avg_response_time'] > 5000:
            issues.append("High average response time")
        
        if performance['p95_response_time'] > 10000:
            issues.append("High P95 response time")
        
        if performance['error_rate'] > 5:
            issues.append("High error rate")
        
        if performance['request_rate'] < 1:
            issues.append("Low request rate")
        
        return issues

Debugging Workflows

1. Systematic Debugging

class SystematicDebugger:
    def __init__(self):
        self.ants = ants
    
    def debug_agent_systematically(self, agent_name: str):
        """Debug agent systematically"""
        
        print(f"🔍 Systematic Debugging for {agent_name}")
        print("=" * 50)
        
        # Step 1: Check agent status
        print("Step 1: Checking Agent Status")
        agent_status = self.ants.agents.get_status(agent_name)
        print(f"Status: {agent_status['status']}")
        
        if agent_status['status'] != 'active':
            print("❌ Agent is not active. Fixing...")
            self.ants.agents.restart(agent_name)
            return
        
        # Step 2: Check recent errors
        print("\nStep 2: Checking Recent Errors")
        recent_errors = self.ants.traces.query({
            "agent": agent_name,
            "status": "error",
            "period": "last_1h"
        })
        
        if recent_errors:
            print(f"❌ Found {len(recent_errors)} errors in the last hour")
            self._analyze_errors(recent_errors)
        else:
            print("✅ No recent errors found")
        
        # Step 3: Check performance
        print("\nStep 3: Checking Performance")
        performance = self.ants.metrics.get_agent_metrics(agent_name, period="last_1h")
        
        if performance['avg_response_time'] > 5000:
            print("❌ High response time detected")
            self._analyze_performance_issues(agent_name)
        else:
            print("✅ Performance is acceptable")
        
        # Step 4: Check resource usage
        print("\nStep 4: Checking Resource Usage")
        resources = self.ants.sre.get_resource_usage(agent_name)
        
        if resources['cpu_usage'] > 80:
            print("❌ High CPU usage detected")
        if resources['memory_usage'] > 80:
            print("❌ High memory usage detected")
        
        print("✅ Systematic debugging complete")
    
    def _analyze_errors(self, errors):
        """Analyze errors"""
        
        print("Error Analysis:")
        for error in errors[:5]:  # Show first 5 errors
            print(f"  Error: {error.error}")
            print(f"  Time: {error.timestamp}")
            print(f"  Input: {error.input[:100]}...")
            print("  ---")
    
    def _analyze_performance_issues(self, agent_name: str):
        """Analyze performance issues"""
        
        print("Performance Issue Analysis:")
        bottlenecks = self.ants.sre.find_bottlenecks(agent_name)
        
        for bottleneck in bottlenecks:
            print(f"  {bottleneck['operation']}: {bottleneck['avg_time']}ms")
            print(f"    Optimization: {bottleneck['optimization']}")

2. Automated Debugging

class AutomatedDebugger {
  private ants: AgenticAnts
  
  async setupAutomatedDebugging() {
    console.log('🤖 Setting Up Automated Debugging')
    console.log('='.repeat(50))
    
    // Configure automated debugging rules
    await this.ants.debugging.configureRules({
      highErrorRate: {
        threshold: 5, // 5% error rate
        action: 'analyze_errors',
        notification: true
      },
      highLatency: {
        threshold: 5000, // 5 seconds
        action: 'analyze_performance',
        notification: true
      },
      lowThroughput: {
        threshold: 1, // 1 req/s
        action: 'analyze_bottlenecks',
        notification: false
      }
    })
    
    console.log('✅ Automated debugging configured')
  }
  
  async runAutomatedDebugging(agentName: string) {
    console.log(`🤖 Running Automated Debugging for ${agentName}`)
    console.log('='.repeat(50))
    
    // Check all debugging rules
    const results = await this.ants.debugging.checkRules(agentName)
    
    results.forEach(result => {
      console.log(`\nRule: ${result.rule}`)
      console.log(`Status: ${result.status}`)
      
      if (result.triggered) {
        console.log(`❌ Rule triggered: ${result.message}`)
        console.log(`Action: ${result.action}`)
        
        // Execute action
        this.executeDebuggingAction(result.action, agentName)
      } else {
        console.log(`✅ Rule not triggered`)
      }
    })
    
    return results
  }
  
  private async executeDebuggingAction(action: string, agentName: string) {
    console.log(`Executing action: ${action}`)
    
    switch (action) {
      case 'analyze_errors':
        await this.analyzeErrors(agentName)
        break
      case 'analyze_performance':
        await this.analyzePerformance(agentName)
        break
      case 'analyze_bottlenecks':
        await this.analyzeBottlenecks(agentName)
        break
    }
  }
  
  private async analyzeErrors(agentName: string) {
    console.log('Analyzing errors...')
    // Implement error analysis
  }
  
  private async analyzePerformance(agentName: string) {
    console.log('Analyzing performance...')
    // Implement performance analysis
  }
  
  private async analyzeBottlenecks(agentName: string) {
    console.log('Analyzing bottlenecks...')
    // Implement bottleneck analysis
  }
}

Best Practices

1. Debugging Checklist

class DebuggingChecklist:
    def __init__(self):
        self.ants = ants
    
    def run_debugging_checklist(self, agent_name: str):
        """Run comprehensive debugging checklist"""
        
        print(f"✅ Debugging Checklist for {agent_name}")
        print("=" * 50)
        
        checklist = [
            ("Agent Status", self.check_agent_status),
            ("Recent Errors", self.check_recent_errors),
            ("Performance Metrics", self.check_performance_metrics),
            ("Resource Usage", self.check_resource_usage),
            ("Configuration", self.check_configuration),
            ("Dependencies", self.check_dependencies)
        ]
        
        results = {}
        
        for item, check_function in checklist:
            try:
                result = check_function(agent_name)
                results[item] = result
                status = "✅ PASS" if result['passed'] else "❌ FAIL"
                print(f"{item}: {status}")
                if not result['passed']:
                    print(f"  Issues: {result['issues']}")
            except Exception as e:
                results[item] = {"passed": False, "issues": [str(e)]}
                print(f"{item}: ❌ ERROR - {e}")
        
        return results
    
    def check_agent_status(self, agent_name: str):
        """Check agent status"""
        status = self.ants.agents.get_status(agent_name)
        return {"passed": status['status'] == 'active', "issues": []}
    
    def check_recent_errors(self, agent_name: str):
        """Check recent errors"""
        errors = self.ants.traces.query({
            "agent": agent_name,
            "status": "error",
            "period": "last_1h"
        })
        return {"passed": len(errors) == 0, "issues": [f"{len(errors)} errors found"]}
    
    def check_performance_metrics(self, agent_name: str):
        """Check performance metrics"""
        performance = self.ants.metrics.get_agent_metrics(agent_name, period="last_1h")
        issues = []
        
        if performance['avg_response_time'] > 5000:
            issues.append("High response time")
        if performance['error_rate'] > 5:
            issues.append("High error rate")
        
        return {"passed": len(issues) == 0, "issues": issues}
    
    def check_resource_usage(self, agent_name: str):
        """Check resource usage"""
        resources = self.ants.sre.get_resource_usage(agent_name)
        issues = []
        
        if resources['cpu_usage'] > 80:
            issues.append("High CPU usage")
        if resources['memory_usage'] > 80:
            issues.append("High memory usage")
        
        return {"passed": len(issues) == 0, "issues": issues}
    
    def check_configuration(self, agent_name: str):
        """Check configuration"""
        config = self.ants.agents.get_configuration(agent_name)
        return {"passed": config['valid'], "issues": []}
    
    def check_dependencies(self, agent_name: str):
        """Check dependencies"""
        dependencies = self.ants.agents.get_dependencies(agent_name)
        issues = []
        
        for dep in dependencies:
            if not dep['healthy']:
                issues.append(f"Unhealthy dependency: {dep['name']}")
        
        return {"passed": len(issues) == 0, "issues": issues}

2. Debugging Strategy

class DebuggingStrategy {
  private ants: AgenticAnts
  
  async implementDebuggingStrategy(agentName: string) {
    console.log(`🎯 Implementing Debugging Strategy for ${agentName}`)
    console.log('='.repeat(50))
    
    // Phase 1: Initial Assessment
    console.log('\nPhase 1: Initial Assessment')
    const assessment = await this.performInitialAssessment(agentName)
    
    // Phase 2: Issue Identification
    console.log('\nPhase 2: Issue Identification')
    const issues = await this.identifyIssues(agentName)
    
    // Phase 3: Root Cause Analysis
    console.log('\nPhase 3: Root Cause Analysis')
    const rootCauses = await this.analyzeRootCauses(issues)
    
    // Phase 4: Solution Implementation
    console.log('\nPhase 4: Solution Implementation')
    const solutions = await this.implementSolutions(rootCauses)
    
    // Phase 5: Verification
    console.log('\nPhase 5: Verification')
    const verification = await this.verifySolutions(agentName)
    
    return {
      assessment,
      issues,
      rootCauses,
      solutions,
      verification
    }
  }
  
  private async performInitialAssessment(agentName: string) {
    console.log('Performing initial assessment...')
    // Implement initial assessment
    return { status: 'completed' }
  }
  
  private async identifyIssues(agentName: string) {
    console.log('Identifying issues...')
    // Implement issue identification
    return { issues: [] }
  }
  
  private async analyzeRootCauses(issues: any[]) {
    console.log('Analyzing root causes...')
    // Implement root cause analysis
    return { rootCauses: [] }
  }
  
  private async implementSolutions(rootCauses: any[]) {
    console.log('Implementing solutions...')
    // Implement solution implementation
    return { solutions: [] }
  }
  
  private async verifySolutions(agentName: string) {
    console.log('Verifying solutions...')
    // Implement solution verification
    return { verified: true }
  }
}

Troubleshooting

Common Debugging Issues

Issue: Agent not responding

def debug_agent_not_responding():
    # Check agent status
    status = ants.agents.get_status("agent_name")
    print(f"Agent status: {status}")
    
    # Check recent traces
    traces = ants.traces.query({
        "agent": "agent_name",
        "period": "last_1h"
    })
    
    print(f"Recent traces: {len(traces)}")
    
    # Check performance
    performance = ants.metrics.get_agent_metrics("agent_name")
    print(f"Performance: {performance}")

Issue: High error rates

async function debugHighErrorRates() {
  const errors = await ants.traces.query({
    agent: 'agent_name',
    status: 'error',
    period: 'last_24h'
  })
  
  console.log(`Total errors: ${errors.length}`)
  
  // Group by error type
  const errorGroups = groupErrorsByType(errors)
  console.log('Error groups:', errorGroups)
}

Next Steps

Production - Learn production best practices
Cost Optimization - Reduce costs with our optimization guide
SRE - Deep dive into SRE capabilities

Example Projects

Debugging Customer Support Bot - GitHub Repository (opens in a new tab)
Performance Debugging - GitHub Repository (opens in a new tab)
Error Analysis System - GitHub Repository (opens in a new tab)

Congratulations! 🎉 You now have comprehensive debugging tools and techniques to effectively troubleshoot and resolve AI agent issues.

Cost Optimization First Agent