our-claude-skills/custom-skills/00-claude-code-setting/code/scripts/analyze_tokens.py

#!/usr/bin/env python3
"""
Token Usage Analyzer
Analyzes MCP servers and CLAUDE.md for token efficiency.
"""

import json
import sys
from pathlib import Path

# Token estimates for known MCP servers
MCP_TOKEN_ESTIMATES = {
    "playwright": 13500,
    "puppeteer": 13500,
    "notion": 5000,
    "github": 18000,
    "postgres": 8000,
    "postgresql": 8000,
    "bigquery": 10000,
    "firecrawl": 6000,
    "zapier": 25000,
    "slack": 8000,
    "linear": 6000,
    "memory": 3000,
    "filesystem": 4000,
    "brave-search": 3000,
    "fetch": 2000,
    "sequential-thinking": 2000,
    "chrome-devtools": 8000,
    "dtm-agent": 5000,
}

# Load strategy recommendations
LOAD_STRATEGIES = {
    "playwright": "always",
    "puppeteer": "always",
    "notion": "always",
    "github": "lazy",
    "postgres": "lazy",
    "postgresql": "lazy",
    "bigquery": "lazy",
    "firecrawl": "lazy",
    "zapier": "disable",
    "slack": "lazy",
    "linear": "lazy",
    "memory": "lazy",
    "filesystem": "always",
    "chrome-devtools": "always",
}

TOKENS_PER_WORD = 1.3
MAX_CLAUDE_MD_LINES = 200
MAX_CLAUDE_MD_TOKENS = 3000


class TokenAnalyzer:
    def __init__(self):
        self.findings = {
            "critical": [],
            "warnings": [],
            "passing": [],
            "recommendations": []
        }
        self.mcp_servers = {}
        self.claude_md_files = []
        self.mcp_tokens = 0
        self.claude_md_tokens = 0

    def find_settings_files(self) -> list:
        """Find MCP settings files."""
        locations = [
            Path.home() / ".claude" / "settings.json",
            Path.cwd() / ".claude" / "settings.json",
            Path.cwd() / ".mcp.json",
        ]
        return [p for p in locations if p.exists()]

    def find_claude_md_files(self) -> list:
        """Find CLAUDE.md files."""
        locations = [
            Path.home() / ".claude" / "CLAUDE.md",
            Path.cwd() / "CLAUDE.md",
            Path.cwd() / ".claude" / "CLAUDE.md",
        ]
        return [p for p in locations if p.exists()]

    def estimate_server_tokens(self, name: str) -> int:
        """Estimate tokens for a server."""
        name_lower = name.lower()
        for key, tokens in MCP_TOKEN_ESTIMATES.items():
            if key in name_lower:
                return tokens
        return 5000  # Default estimate

    def get_load_strategy(self, name: str, config: dict = None) -> str:
        """Get load strategy - checks actual config first, then recommendations."""
        # Check actual autoStart setting in config
        if config and config.get("autoStart") is False:
            return "lazy"

        name_lower = name.lower()
        for key, strategy in LOAD_STRATEGIES.items():
            if key in name_lower:
                return strategy
        return "lazy"  # Default to lazy for unknown

    def analyze_mcp_servers(self):
        """Analyze MCP server configurations."""
        settings_files = self.find_settings_files()

        if not settings_files:
            self.findings["warnings"].append("No MCP settings files found")
            return

        for settings_path in settings_files:
            try:
                with open(settings_path) as f:
                    settings = json.load(f)
            except (json.JSONDecodeError, IOError) as e:
                self.findings["warnings"].append(f"Could not parse {settings_path}: {e}")
                continue

            servers = settings.get("mcpServers", {})

            for name, config in servers.items():
                if not isinstance(config, dict):
                    continue

                tokens = self.estimate_server_tokens(name)
                has_instructions = "serverInstructions" in config
                strategy = self.get_load_strategy(name, config)

                self.mcp_servers[name] = {
                    "tokens": tokens,
                    "has_instructions": has_instructions,
                    "strategy": strategy,
                    "source": str(settings_path)
                }

                # Only count "always" servers for baseline
                if strategy == "always":
                    self.mcp_tokens += tokens

                # Generate findings
                if not has_instructions:
                    self.findings["critical"].append(
                        f"MCP '{name}': Missing serverInstructions (breaks Tool Search)"
                    )
                else:
                    self.findings["passing"].append(f"MCP '{name}': Has serverInstructions")

                if tokens > 15000 and strategy == "always":
                    self.findings["warnings"].append(
                        f"MCP '{name}': Heavy server (~{tokens:,} tokens), consider lazy loading"
                    )

    def analyze_claude_md(self):
        """Analyze CLAUDE.md files."""
        files = self.find_claude_md_files()

        if not files:
            self.findings["warnings"].append("No CLAUDE.md files found")
            return

        for path in files:
            try:
                content = path.read_text()
            except IOError as e:
                self.findings["warnings"].append(f"Could not read {path}: {e}")
                continue

            lines = len(content.split('\n'))
            words = len(content.split())
            tokens = int(words * TOKENS_PER_WORD)

            self.claude_md_files.append({
                "path": str(path),
                "lines": lines,
                "words": words,
                "tokens": tokens
            })

            self.claude_md_tokens += tokens

            # Generate findings
            if tokens > MAX_CLAUDE_MD_TOKENS:
                self.findings["critical"].append(
                    f"CLAUDE.md ({path.name}): ~{tokens:,} tokens exceeds {MAX_CLAUDE_MD_TOKENS:,} limit"
                )
            elif lines > MAX_CLAUDE_MD_LINES:
                self.findings["warnings"].append(
                    f"CLAUDE.md ({path.name}): {lines} lines exceeds {MAX_CLAUDE_MD_LINES} recommended"
                )
            else:
                self.findings["passing"].append(
                    f"CLAUDE.md ({path.name}): {lines} lines, ~{tokens:,} tokens - Good"
                )

            # Check structure
            if '\n\n\n' in content:
                self.findings["warnings"].append(
                    f"CLAUDE.md ({path.name}): Contains excessive whitespace"
                )

            # Check for common redundancy
            content_lower = content.lower()
            if "you are claude" in content_lower or "you are an ai" in content_lower:
                self.findings["recommendations"].append(
                    f"CLAUDE.md ({path.name}): Remove self-descriptions Claude already knows"
                )

    def analyze(self) -> dict:
        """Run full analysis."""
        self.analyze_mcp_servers()
        self.analyze_claude_md()

        total_tokens = self.mcp_tokens + self.claude_md_tokens
        usage_pct = (total_tokens / 200000) * 100

        # Overall recommendations
        if usage_pct > 30:
            self.findings["critical"].append(
                f"Baseline uses {usage_pct:.1f}% of context - target is under 30%"
            )
        elif usage_pct > 20:
            self.findings["warnings"].append(
                f"Baseline uses {usage_pct:.1f}% of context - consider optimization"
            )

        missing_instructions = sum(
            1 for s in self.mcp_servers.values() if not s.get("has_instructions")
        )
        if missing_instructions > 0:
            self.findings["recommendations"].append(
                f"Add serverInstructions to {missing_instructions} MCP server(s) for Tool Search"
            )

        return {
            "total_tokens": total_tokens,
            "mcp_tokens": self.mcp_tokens,
            "claude_md_tokens": self.claude_md_tokens,
            "mcp_count": len(self.mcp_servers),
            "mcp_servers": self.mcp_servers,
            "claude_md_files": self.claude_md_files,
            "usage_percentage": round(usage_pct, 1),
            "findings": self.findings
        }


def main():
    analyzer = TokenAnalyzer()
    report = analyzer.analyze()
    print(json.dumps(report, indent=2))
    return 0


if __name__ == "__main__":
    sys.exit(main())