Files
our-claude-skills/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/full_audit.py
Andrew Yim 9426787ba6 feat(seo-audit): Add comprehensive SEO audit skill
Add ourdigital-seo-audit skill with:
- Full site audit orchestrator (full_audit.py)
- Google Search Console and PageSpeed API clients
- Schema.org JSON-LD validation and generation
- XML sitemap and robots.txt validation
- Notion database integration for findings export
- Core Web Vitals measurement and analysis
- 7 schema templates (article, faq, product, etc.)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:30:02 +09:00

498 lines
18 KiB
Python

"""
Full SEO Audit - Orchestration Script
=====================================
Purpose: Run comprehensive SEO audit combining all tools
Python: 3.10+
Usage:
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
"""
import argparse
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from urllib.parse import urlparse
from robots_checker import RobotsChecker
from schema_validator import SchemaValidator
from sitemap_validator import SitemapValidator
from pagespeed_client import PageSpeedClient
from notion_reporter import NotionReporter, SEOFinding
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
@dataclass
class AuditResult:
"""Complete SEO audit result."""
url: str
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
robots: dict = field(default_factory=dict)
sitemap: dict = field(default_factory=dict)
schema: dict = field(default_factory=dict)
performance: dict = field(default_factory=dict)
findings: list[SEOFinding] = field(default_factory=list)
summary: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"url": self.url,
"timestamp": self.timestamp,
"robots": self.robots,
"sitemap": self.sitemap,
"schema": self.schema,
"performance": self.performance,
"summary": self.summary,
"findings_count": len(self.findings),
}
class SEOAuditor:
"""Orchestrate comprehensive SEO audit."""
def __init__(self):
self.robots_checker = RobotsChecker()
self.sitemap_validator = SitemapValidator()
self.schema_validator = SchemaValidator()
self.pagespeed_client = PageSpeedClient()
def run_audit(
self,
url: str,
include_robots: bool = True,
include_sitemap: bool = True,
include_schema: bool = True,
include_performance: bool = True,
) -> AuditResult:
"""
Run comprehensive SEO audit.
Args:
url: URL to audit
include_robots: Check robots.txt
include_sitemap: Validate sitemap
include_schema: Validate schema markup
include_performance: Run PageSpeed analysis
"""
result = AuditResult(url=url)
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
logger.info(f"Starting SEO audit for {url}")
# 1. Robots.txt analysis
if include_robots:
logger.info("Analyzing robots.txt...")
try:
robots_result = self.robots_checker.analyze(base_url)
result.robots = robots_result.to_dict()
self._process_robots_findings(robots_result, result)
except Exception as e:
logger.error(f"Robots.txt analysis failed: {e}")
result.robots = {"error": str(e)}
# 2. Sitemap validation
if include_sitemap:
logger.info("Validating sitemap...")
sitemap_url = f"{base_url}/sitemap.xml"
# Try to get sitemap URL from robots.txt
if result.robots.get("sitemaps"):
sitemap_url = result.robots["sitemaps"][0]
try:
sitemap_result = self.sitemap_validator.validate(sitemap_url)
result.sitemap = sitemap_result.to_dict()
self._process_sitemap_findings(sitemap_result, result)
except Exception as e:
logger.error(f"Sitemap validation failed: {e}")
result.sitemap = {"error": str(e)}
# 3. Schema validation
if include_schema:
logger.info("Validating schema markup...")
try:
schema_result = self.schema_validator.validate(url=url)
result.schema = schema_result.to_dict()
self._process_schema_findings(schema_result, result)
except Exception as e:
logger.error(f"Schema validation failed: {e}")
result.schema = {"error": str(e)}
# 4. PageSpeed analysis
if include_performance:
logger.info("Running PageSpeed analysis...")
try:
perf_result = self.pagespeed_client.analyze(url, strategy="mobile")
result.performance = perf_result.to_dict()
self._process_performance_findings(perf_result, result)
except Exception as e:
logger.error(f"PageSpeed analysis failed: {e}")
result.performance = {"error": str(e)}
# Generate summary
result.summary = self._generate_summary(result)
logger.info(f"Audit complete. Found {len(result.findings)} issues.")
return result
def _process_robots_findings(self, robots_result, audit_result: AuditResult):
"""Convert robots.txt issues to findings."""
for issue in robots_result.issues:
priority = "Medium"
if issue.severity == "error":
priority = "Critical"
elif issue.severity == "warning":
priority = "High"
audit_result.findings.append(SEOFinding(
issue=issue.message,
category="Robots.txt",
priority=priority,
description=issue.directive or "",
recommendation=issue.suggestion or "",
))
def _process_sitemap_findings(self, sitemap_result, audit_result: AuditResult):
"""Convert sitemap issues to findings."""
for issue in sitemap_result.issues:
priority = "Medium"
if issue.severity == "error":
priority = "High"
elif issue.severity == "warning":
priority = "Medium"
audit_result.findings.append(SEOFinding(
issue=issue.message,
category="Sitemap",
priority=priority,
url=issue.url,
recommendation=issue.suggestion or "",
))
def _process_schema_findings(self, schema_result, audit_result: AuditResult):
"""Convert schema issues to findings."""
for issue in schema_result.issues:
priority = "Low"
if issue.severity == "error":
priority = "High"
elif issue.severity == "warning":
priority = "Medium"
audit_result.findings.append(SEOFinding(
issue=issue.message,
category="Schema/Structured Data",
priority=priority,
description=f"Schema type: {issue.schema_type}" if issue.schema_type else "",
recommendation=issue.suggestion or "",
))
def _process_performance_findings(self, perf_result, audit_result: AuditResult):
"""Convert performance issues to findings."""
cwv = perf_result.core_web_vitals
# Check Core Web Vitals
if cwv.lcp_rating == "POOR":
audit_result.findings.append(SEOFinding(
issue=f"Poor LCP: {cwv.lcp / 1000:.2f}s (should be < 2.5s)",
category="Performance",
priority="Critical",
impact="Users experience slow page loads, affecting bounce rate and rankings",
recommendation="Optimize images, reduce server response time, use CDN",
))
elif cwv.lcp_rating == "NEEDS_IMPROVEMENT":
audit_result.findings.append(SEOFinding(
issue=f"LCP needs improvement: {cwv.lcp / 1000:.2f}s (target < 2.5s)",
category="Performance",
priority="High",
recommendation="Optimize largest content element loading",
))
if cwv.cls_rating == "POOR":
audit_result.findings.append(SEOFinding(
issue=f"Poor CLS: {cwv.cls:.3f} (should be < 0.1)",
category="Performance",
priority="High",
impact="Layout shifts frustrate users",
recommendation="Set dimensions for images/embeds, avoid inserting content above existing content",
))
if cwv.fid_rating == "POOR":
audit_result.findings.append(SEOFinding(
issue=f"Poor FID/TBT: {cwv.fid:.0f}ms (should be < 100ms)",
category="Performance",
priority="High",
impact="Slow interactivity affects user experience",
recommendation="Reduce JavaScript execution time, break up long tasks",
))
# Check performance score
if perf_result.performance_score and perf_result.performance_score < 50:
audit_result.findings.append(SEOFinding(
issue=f"Low performance score: {perf_result.performance_score:.0f}/100",
category="Performance",
priority="High",
impact="Poor performance affects user experience and SEO",
recommendation="Address top opportunities from PageSpeed Insights",
))
# Add top opportunities as findings
for opp in perf_result.opportunities[:3]:
if opp["savings_ms"] > 500: # Only significant savings
audit_result.findings.append(SEOFinding(
issue=opp["title"],
category="Performance",
priority="Medium",
description=opp.get("description", ""),
impact=f"Potential savings: {opp['savings_ms'] / 1000:.1f}s",
recommendation="See PageSpeed Insights for details",
))
def _generate_summary(self, result: AuditResult) -> dict:
"""Generate audit summary."""
findings_by_priority = {}
findings_by_category = {}
for finding in result.findings:
# Count by priority
findings_by_priority[finding.priority] = (
findings_by_priority.get(finding.priority, 0) + 1
)
# Count by category
findings_by_category[finding.category] = (
findings_by_category.get(finding.category, 0) + 1
)
return {
"total_findings": len(result.findings),
"findings_by_priority": findings_by_priority,
"findings_by_category": findings_by_category,
"robots_accessible": result.robots.get("accessible", False),
"sitemap_valid": result.sitemap.get("valid", False),
"schema_valid": result.schema.get("valid", False),
"performance_score": result.performance.get("scores", {}).get("performance"),
"quick_wins": [
f.issue for f in result.findings
if f.priority in ("Medium", "Low")
][:5],
"critical_issues": [
f.issue for f in result.findings
if f.priority == "Critical"
],
}
def export_to_notion(
self,
result: AuditResult,
parent_page_id: str | None = None,
use_default_db: bool = True,
) -> dict:
"""
Export audit results to Notion.
Args:
result: AuditResult object
parent_page_id: Parent page ID (for creating new database)
use_default_db: If True, use OurDigital SEO Audit Log database
Returns:
Dict with database_id, summary_page_id, findings_created
"""
reporter = NotionReporter()
audit_id = f"{urlparse(result.url).netloc}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
# Add site and audit_id to all findings
for finding in result.findings:
finding.site = result.url
finding.audit_id = audit_id
if use_default_db:
# Use the default OurDigital SEO Audit Log database
page_ids = reporter.add_findings_batch(result.findings)
return {
"database_id": reporter.DEFAULT_DATABASE_ID if hasattr(reporter, 'DEFAULT_DATABASE_ID') else "2c8581e5-8a1e-8035-880b-e38cefc2f3ef",
"audit_id": audit_id,
"findings_created": len(page_ids),
}
else:
# Create new database under parent page
if not parent_page_id:
raise ValueError("parent_page_id required when not using default database")
db_title = f"SEO Audit - {urlparse(result.url).netloc} - {datetime.now().strftime('%Y-%m-%d')}"
database_id = reporter.create_findings_database(parent_page_id, db_title)
page_ids = reporter.add_findings_batch(result.findings, database_id)
# Create summary page
summary_page_id = reporter.create_audit_summary_page(
parent_page_id,
result.url,
result.summary,
)
return {
"database_id": database_id,
"summary_page_id": summary_page_id,
"audit_id": audit_id,
"findings_created": len(page_ids),
}
def generate_report(self, result: AuditResult) -> str:
"""Generate human-readable report."""
lines = [
"=" * 70,
"SEO AUDIT REPORT",
"=" * 70,
f"URL: {result.url}",
f"Date: {result.timestamp}",
"",
"-" * 70,
"SUMMARY",
"-" * 70,
f"Total Issues Found: {result.summary.get('total_findings', 0)}",
"",
]
# Priority breakdown
lines.append("Issues by Priority:")
for priority in ["Critical", "High", "Medium", "Low"]:
count = result.summary.get("findings_by_priority", {}).get(priority, 0)
if count:
lines.append(f" {priority}: {count}")
lines.append("")
# Category breakdown
lines.append("Issues by Category:")
for category, count in result.summary.get("findings_by_category", {}).items():
lines.append(f" {category}: {count}")
lines.append("")
lines.append("-" * 70)
lines.append("STATUS OVERVIEW")
lines.append("-" * 70)
# Status checks
lines.append(f"Robots.txt: {'✓ Accessible' if result.robots.get('accessible') else '✗ Not accessible'}")
lines.append(f"Sitemap: {'✓ Valid' if result.sitemap.get('valid') else '✗ Issues found'}")
lines.append(f"Schema: {'✓ Valid' if result.schema.get('valid') else '✗ Issues found'}")
perf_score = result.performance.get("scores", {}).get("performance")
if perf_score:
status = "✓ Good" if perf_score >= 90 else "⚠ Needs work" if perf_score >= 50 else "✗ Poor"
lines.append(f"Performance: {status} ({perf_score:.0f}/100)")
# Critical issues
critical = result.summary.get("critical_issues", [])
if critical:
lines.extend([
"",
"-" * 70,
"CRITICAL ISSUES (Fix Immediately)",
"-" * 70,
])
for issue in critical:
lines.append(f"{issue}")
# Quick wins
quick_wins = result.summary.get("quick_wins", [])
if quick_wins:
lines.extend([
"",
"-" * 70,
"QUICK WINS",
"-" * 70,
])
for issue in quick_wins[:5]:
lines.append(f"{issue}")
# All findings
if result.findings:
lines.extend([
"",
"-" * 70,
"ALL FINDINGS",
"-" * 70,
])
current_category = None
for finding in sorted(result.findings, key=lambda x: (x.category, x.priority)):
if finding.category != current_category:
current_category = finding.category
lines.append(f"\n[{current_category}]")
lines.append(f" [{finding.priority}] {finding.issue}")
if finding.recommendation:
lines.append(f"{finding.recommendation}")
lines.extend(["", "=" * 70])
return "\n".join(lines)
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(
description="Run comprehensive SEO audit",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Run full audit and output to console
python full_audit.py --url https://example.com
# Export to Notion
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
# Output as JSON
python full_audit.py --url https://example.com --json
""",
)
parser.add_argument("--url", "-u", required=True, help="URL to audit")
parser.add_argument("--output", "-o", choices=["console", "notion", "json"],
default="console", help="Output format")
parser.add_argument("--notion-page-id", help="Notion parent page ID (required for notion output)")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--no-robots", action="store_true", help="Skip robots.txt check")
parser.add_argument("--no-sitemap", action="store_true", help="Skip sitemap validation")
parser.add_argument("--no-schema", action="store_true", help="Skip schema validation")
parser.add_argument("--no-performance", action="store_true", help="Skip PageSpeed analysis")
args = parser.parse_args()
auditor = SEOAuditor()
# Run audit
result = auditor.run_audit(
args.url,
include_robots=not args.no_robots,
include_sitemap=not args.no_sitemap,
include_schema=not args.no_schema,
include_performance=not args.no_performance,
)
# Output results
if args.json or args.output == "json":
print(json.dumps(result.to_dict(), indent=2, default=str))
elif args.output == "notion":
if not args.notion_page_id:
parser.error("--notion-page-id required for notion output")
notion_result = auditor.export_to_notion(result, args.notion_page_id)
print(f"Exported to Notion:")
print(f" Database ID: {notion_result['database_id']}")
print(f" Summary Page: {notion_result['summary_page_id']}")
print(f" Findings Created: {notion_result['findings_created']}")
else:
print(auditor.generate_report(result))
if __name__ == "__main__":
main()