Files
our-claude-skills/custom-skills/00-claude-code-setting/code/scripts/analyze_tokens.py
Andrew Yim b69e4b6f3a refactor: Reorganize skill numbering and update documentation
Skill Numbering Changes:
- 01-03: OurDigital core (was 30-32)
- 31-32: Notion tools (was 01-02)
- 99_archive: Renamed from _archive for sorting

New Files:
- AGENTS.md: Claude Code agent routing guide
- requirements.txt for 00-claude-code-setting, 32-notion-writer, 43-jamie-youtube-manager

Documentation Updates:
- CLAUDE.md: Updated skill inventory (23 skills)
- AUDIT_REPORT.md: Current completion status (91%)
- Archived REFACTORING_PLAN.md (most tasks complete)

Removed:
- ga-agent-skills/ (moved to separate repo ~/Project/dintel-ga4-agent)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 18:42:39 +07:00

259 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""
Token Usage Analyzer
Analyzes MCP servers and CLAUDE.md for token efficiency.
"""
import json
import sys
from pathlib import Path
# Token estimates for known MCP servers
MCP_TOKEN_ESTIMATES = {
"playwright": 13500,
"puppeteer": 13500,
"notion": 5000,
"github": 18000,
"postgres": 8000,
"postgresql": 8000,
"bigquery": 10000,
"firecrawl": 6000,
"zapier": 25000,
"slack": 8000,
"linear": 6000,
"memory": 3000,
"filesystem": 4000,
"brave-search": 3000,
"fetch": 2000,
"sequential-thinking": 2000,
"chrome-devtools": 8000,
"dtm-agent": 5000,
}
# Load strategy recommendations
LOAD_STRATEGIES = {
"playwright": "always",
"puppeteer": "always",
"notion": "always",
"github": "lazy",
"postgres": "lazy",
"postgresql": "lazy",
"bigquery": "lazy",
"firecrawl": "lazy",
"zapier": "disable",
"slack": "lazy",
"linear": "lazy",
"memory": "lazy",
"filesystem": "always",
"chrome-devtools": "always",
}
TOKENS_PER_WORD = 1.3
MAX_CLAUDE_MD_LINES = 200
MAX_CLAUDE_MD_TOKENS = 3000
class TokenAnalyzer:
def __init__(self):
self.findings = {
"critical": [],
"warnings": [],
"passing": [],
"recommendations": []
}
self.mcp_servers = {}
self.claude_md_files = []
self.mcp_tokens = 0
self.claude_md_tokens = 0
def find_settings_files(self) -> list:
"""Find MCP settings files."""
locations = [
Path.home() / ".claude" / "settings.json",
Path.cwd() / ".claude" / "settings.json",
Path.cwd() / ".mcp.json",
]
return [p for p in locations if p.exists()]
def find_claude_md_files(self) -> list:
"""Find CLAUDE.md files."""
locations = [
Path.home() / ".claude" / "CLAUDE.md",
Path.cwd() / "CLAUDE.md",
Path.cwd() / ".claude" / "CLAUDE.md",
]
return [p for p in locations if p.exists()]
def estimate_server_tokens(self, name: str) -> int:
"""Estimate tokens for a server."""
name_lower = name.lower()
for key, tokens in MCP_TOKEN_ESTIMATES.items():
if key in name_lower:
return tokens
return 5000 # Default estimate
def get_load_strategy(self, name: str, config: dict = None) -> str:
"""Get load strategy - checks actual config first, then recommendations."""
# Check actual autoStart setting in config
if config and config.get("autoStart") is False:
return "lazy"
name_lower = name.lower()
for key, strategy in LOAD_STRATEGIES.items():
if key in name_lower:
return strategy
return "lazy" # Default to lazy for unknown
def analyze_mcp_servers(self):
"""Analyze MCP server configurations."""
settings_files = self.find_settings_files()
if not settings_files:
self.findings["warnings"].append("No MCP settings files found")
return
for settings_path in settings_files:
try:
with open(settings_path) as f:
settings = json.load(f)
except (json.JSONDecodeError, IOError) as e:
self.findings["warnings"].append(f"Could not parse {settings_path}: {e}")
continue
servers = settings.get("mcpServers", {})
for name, config in servers.items():
if not isinstance(config, dict):
continue
tokens = self.estimate_server_tokens(name)
has_instructions = "serverInstructions" in config
strategy = self.get_load_strategy(name, config)
self.mcp_servers[name] = {
"tokens": tokens,
"has_instructions": has_instructions,
"strategy": strategy,
"source": str(settings_path)
}
# Only count "always" servers for baseline
if strategy == "always":
self.mcp_tokens += tokens
# Generate findings
if not has_instructions:
self.findings["critical"].append(
f"MCP '{name}': Missing serverInstructions (breaks Tool Search)"
)
else:
self.findings["passing"].append(f"MCP '{name}': Has serverInstructions")
if tokens > 15000 and strategy == "always":
self.findings["warnings"].append(
f"MCP '{name}': Heavy server (~{tokens:,} tokens), consider lazy loading"
)
def analyze_claude_md(self):
"""Analyze CLAUDE.md files."""
files = self.find_claude_md_files()
if not files:
self.findings["warnings"].append("No CLAUDE.md files found")
return
for path in files:
try:
content = path.read_text()
except IOError as e:
self.findings["warnings"].append(f"Could not read {path}: {e}")
continue
lines = len(content.split('\n'))
words = len(content.split())
tokens = int(words * TOKENS_PER_WORD)
self.claude_md_files.append({
"path": str(path),
"lines": lines,
"words": words,
"tokens": tokens
})
self.claude_md_tokens += tokens
# Generate findings
if tokens > MAX_CLAUDE_MD_TOKENS:
self.findings["critical"].append(
f"CLAUDE.md ({path.name}): ~{tokens:,} tokens exceeds {MAX_CLAUDE_MD_TOKENS:,} limit"
)
elif lines > MAX_CLAUDE_MD_LINES:
self.findings["warnings"].append(
f"CLAUDE.md ({path.name}): {lines} lines exceeds {MAX_CLAUDE_MD_LINES} recommended"
)
else:
self.findings["passing"].append(
f"CLAUDE.md ({path.name}): {lines} lines, ~{tokens:,} tokens - Good"
)
# Check structure
if '\n\n\n' in content:
self.findings["warnings"].append(
f"CLAUDE.md ({path.name}): Contains excessive whitespace"
)
# Check for common redundancy
content_lower = content.lower()
if "you are claude" in content_lower or "you are an ai" in content_lower:
self.findings["recommendations"].append(
f"CLAUDE.md ({path.name}): Remove self-descriptions Claude already knows"
)
def analyze(self) -> dict:
"""Run full analysis."""
self.analyze_mcp_servers()
self.analyze_claude_md()
total_tokens = self.mcp_tokens + self.claude_md_tokens
usage_pct = (total_tokens / 200000) * 100
# Overall recommendations
if usage_pct > 30:
self.findings["critical"].append(
f"Baseline uses {usage_pct:.1f}% of context - target is under 30%"
)
elif usage_pct > 20:
self.findings["warnings"].append(
f"Baseline uses {usage_pct:.1f}% of context - consider optimization"
)
missing_instructions = sum(
1 for s in self.mcp_servers.values() if not s.get("has_instructions")
)
if missing_instructions > 0:
self.findings["recommendations"].append(
f"Add serverInstructions to {missing_instructions} MCP server(s) for Tool Search"
)
return {
"total_tokens": total_tokens,
"mcp_tokens": self.mcp_tokens,
"claude_md_tokens": self.claude_md_tokens,
"mcp_count": len(self.mcp_servers),
"mcp_servers": self.mcp_servers,
"claude_md_files": self.claude_md_files,
"usage_percentage": round(usage_pct, 1),
"findings": self.findings
}
def main():
analyzer = TokenAnalyzer()
report = analyzer.analyze()
print(json.dumps(report, indent=2))
return 0
if __name__ == "__main__":
sys.exit(main())