Add ourdigital-seo-audit skill with: - Full site audit orchestrator (full_audit.py) - Google Search Console and PageSpeed API clients - Schema.org JSON-LD validation and generation - XML sitemap and robots.txt validation - Notion database integration for findings export - Core Web Vitals measurement and analysis - 7 schema templates (article, faq, product, etc.) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
498 lines
18 KiB
Python
498 lines
18 KiB
Python
"""
|
|
Full SEO Audit - Orchestration Script
|
|
=====================================
|
|
Purpose: Run comprehensive SEO audit combining all tools
|
|
Python: 3.10+
|
|
Usage:
|
|
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from typing import Any
|
|
from urllib.parse import urlparse
|
|
|
|
from robots_checker import RobotsChecker
|
|
from schema_validator import SchemaValidator
|
|
from sitemap_validator import SitemapValidator
|
|
from pagespeed_client import PageSpeedClient
|
|
from notion_reporter import NotionReporter, SEOFinding
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class AuditResult:
|
|
"""Complete SEO audit result."""
|
|
|
|
url: str
|
|
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
robots: dict = field(default_factory=dict)
|
|
sitemap: dict = field(default_factory=dict)
|
|
schema: dict = field(default_factory=dict)
|
|
performance: dict = field(default_factory=dict)
|
|
findings: list[SEOFinding] = field(default_factory=list)
|
|
summary: dict = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"url": self.url,
|
|
"timestamp": self.timestamp,
|
|
"robots": self.robots,
|
|
"sitemap": self.sitemap,
|
|
"schema": self.schema,
|
|
"performance": self.performance,
|
|
"summary": self.summary,
|
|
"findings_count": len(self.findings),
|
|
}
|
|
|
|
|
|
class SEOAuditor:
|
|
"""Orchestrate comprehensive SEO audit."""
|
|
|
|
def __init__(self):
|
|
self.robots_checker = RobotsChecker()
|
|
self.sitemap_validator = SitemapValidator()
|
|
self.schema_validator = SchemaValidator()
|
|
self.pagespeed_client = PageSpeedClient()
|
|
|
|
def run_audit(
|
|
self,
|
|
url: str,
|
|
include_robots: bool = True,
|
|
include_sitemap: bool = True,
|
|
include_schema: bool = True,
|
|
include_performance: bool = True,
|
|
) -> AuditResult:
|
|
"""
|
|
Run comprehensive SEO audit.
|
|
|
|
Args:
|
|
url: URL to audit
|
|
include_robots: Check robots.txt
|
|
include_sitemap: Validate sitemap
|
|
include_schema: Validate schema markup
|
|
include_performance: Run PageSpeed analysis
|
|
"""
|
|
result = AuditResult(url=url)
|
|
parsed_url = urlparse(url)
|
|
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
|
|
|
logger.info(f"Starting SEO audit for {url}")
|
|
|
|
# 1. Robots.txt analysis
|
|
if include_robots:
|
|
logger.info("Analyzing robots.txt...")
|
|
try:
|
|
robots_result = self.robots_checker.analyze(base_url)
|
|
result.robots = robots_result.to_dict()
|
|
self._process_robots_findings(robots_result, result)
|
|
except Exception as e:
|
|
logger.error(f"Robots.txt analysis failed: {e}")
|
|
result.robots = {"error": str(e)}
|
|
|
|
# 2. Sitemap validation
|
|
if include_sitemap:
|
|
logger.info("Validating sitemap...")
|
|
sitemap_url = f"{base_url}/sitemap.xml"
|
|
# Try to get sitemap URL from robots.txt
|
|
if result.robots.get("sitemaps"):
|
|
sitemap_url = result.robots["sitemaps"][0]
|
|
try:
|
|
sitemap_result = self.sitemap_validator.validate(sitemap_url)
|
|
result.sitemap = sitemap_result.to_dict()
|
|
self._process_sitemap_findings(sitemap_result, result)
|
|
except Exception as e:
|
|
logger.error(f"Sitemap validation failed: {e}")
|
|
result.sitemap = {"error": str(e)}
|
|
|
|
# 3. Schema validation
|
|
if include_schema:
|
|
logger.info("Validating schema markup...")
|
|
try:
|
|
schema_result = self.schema_validator.validate(url=url)
|
|
result.schema = schema_result.to_dict()
|
|
self._process_schema_findings(schema_result, result)
|
|
except Exception as e:
|
|
logger.error(f"Schema validation failed: {e}")
|
|
result.schema = {"error": str(e)}
|
|
|
|
# 4. PageSpeed analysis
|
|
if include_performance:
|
|
logger.info("Running PageSpeed analysis...")
|
|
try:
|
|
perf_result = self.pagespeed_client.analyze(url, strategy="mobile")
|
|
result.performance = perf_result.to_dict()
|
|
self._process_performance_findings(perf_result, result)
|
|
except Exception as e:
|
|
logger.error(f"PageSpeed analysis failed: {e}")
|
|
result.performance = {"error": str(e)}
|
|
|
|
# Generate summary
|
|
result.summary = self._generate_summary(result)
|
|
|
|
logger.info(f"Audit complete. Found {len(result.findings)} issues.")
|
|
return result
|
|
|
|
def _process_robots_findings(self, robots_result, audit_result: AuditResult):
|
|
"""Convert robots.txt issues to findings."""
|
|
for issue in robots_result.issues:
|
|
priority = "Medium"
|
|
if issue.severity == "error":
|
|
priority = "Critical"
|
|
elif issue.severity == "warning":
|
|
priority = "High"
|
|
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=issue.message,
|
|
category="Robots.txt",
|
|
priority=priority,
|
|
description=issue.directive or "",
|
|
recommendation=issue.suggestion or "",
|
|
))
|
|
|
|
def _process_sitemap_findings(self, sitemap_result, audit_result: AuditResult):
|
|
"""Convert sitemap issues to findings."""
|
|
for issue in sitemap_result.issues:
|
|
priority = "Medium"
|
|
if issue.severity == "error":
|
|
priority = "High"
|
|
elif issue.severity == "warning":
|
|
priority = "Medium"
|
|
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=issue.message,
|
|
category="Sitemap",
|
|
priority=priority,
|
|
url=issue.url,
|
|
recommendation=issue.suggestion or "",
|
|
))
|
|
|
|
def _process_schema_findings(self, schema_result, audit_result: AuditResult):
|
|
"""Convert schema issues to findings."""
|
|
for issue in schema_result.issues:
|
|
priority = "Low"
|
|
if issue.severity == "error":
|
|
priority = "High"
|
|
elif issue.severity == "warning":
|
|
priority = "Medium"
|
|
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=issue.message,
|
|
category="Schema/Structured Data",
|
|
priority=priority,
|
|
description=f"Schema type: {issue.schema_type}" if issue.schema_type else "",
|
|
recommendation=issue.suggestion or "",
|
|
))
|
|
|
|
def _process_performance_findings(self, perf_result, audit_result: AuditResult):
|
|
"""Convert performance issues to findings."""
|
|
cwv = perf_result.core_web_vitals
|
|
|
|
# Check Core Web Vitals
|
|
if cwv.lcp_rating == "POOR":
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=f"Poor LCP: {cwv.lcp / 1000:.2f}s (should be < 2.5s)",
|
|
category="Performance",
|
|
priority="Critical",
|
|
impact="Users experience slow page loads, affecting bounce rate and rankings",
|
|
recommendation="Optimize images, reduce server response time, use CDN",
|
|
))
|
|
elif cwv.lcp_rating == "NEEDS_IMPROVEMENT":
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=f"LCP needs improvement: {cwv.lcp / 1000:.2f}s (target < 2.5s)",
|
|
category="Performance",
|
|
priority="High",
|
|
recommendation="Optimize largest content element loading",
|
|
))
|
|
|
|
if cwv.cls_rating == "POOR":
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=f"Poor CLS: {cwv.cls:.3f} (should be < 0.1)",
|
|
category="Performance",
|
|
priority="High",
|
|
impact="Layout shifts frustrate users",
|
|
recommendation="Set dimensions for images/embeds, avoid inserting content above existing content",
|
|
))
|
|
|
|
if cwv.fid_rating == "POOR":
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=f"Poor FID/TBT: {cwv.fid:.0f}ms (should be < 100ms)",
|
|
category="Performance",
|
|
priority="High",
|
|
impact="Slow interactivity affects user experience",
|
|
recommendation="Reduce JavaScript execution time, break up long tasks",
|
|
))
|
|
|
|
# Check performance score
|
|
if perf_result.performance_score and perf_result.performance_score < 50:
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=f"Low performance score: {perf_result.performance_score:.0f}/100",
|
|
category="Performance",
|
|
priority="High",
|
|
impact="Poor performance affects user experience and SEO",
|
|
recommendation="Address top opportunities from PageSpeed Insights",
|
|
))
|
|
|
|
# Add top opportunities as findings
|
|
for opp in perf_result.opportunities[:3]:
|
|
if opp["savings_ms"] > 500: # Only significant savings
|
|
audit_result.findings.append(SEOFinding(
|
|
issue=opp["title"],
|
|
category="Performance",
|
|
priority="Medium",
|
|
description=opp.get("description", ""),
|
|
impact=f"Potential savings: {opp['savings_ms'] / 1000:.1f}s",
|
|
recommendation="See PageSpeed Insights for details",
|
|
))
|
|
|
|
def _generate_summary(self, result: AuditResult) -> dict:
|
|
"""Generate audit summary."""
|
|
findings_by_priority = {}
|
|
findings_by_category = {}
|
|
|
|
for finding in result.findings:
|
|
# Count by priority
|
|
findings_by_priority[finding.priority] = (
|
|
findings_by_priority.get(finding.priority, 0) + 1
|
|
)
|
|
# Count by category
|
|
findings_by_category[finding.category] = (
|
|
findings_by_category.get(finding.category, 0) + 1
|
|
)
|
|
|
|
return {
|
|
"total_findings": len(result.findings),
|
|
"findings_by_priority": findings_by_priority,
|
|
"findings_by_category": findings_by_category,
|
|
"robots_accessible": result.robots.get("accessible", False),
|
|
"sitemap_valid": result.sitemap.get("valid", False),
|
|
"schema_valid": result.schema.get("valid", False),
|
|
"performance_score": result.performance.get("scores", {}).get("performance"),
|
|
"quick_wins": [
|
|
f.issue for f in result.findings
|
|
if f.priority in ("Medium", "Low")
|
|
][:5],
|
|
"critical_issues": [
|
|
f.issue for f in result.findings
|
|
if f.priority == "Critical"
|
|
],
|
|
}
|
|
|
|
def export_to_notion(
|
|
self,
|
|
result: AuditResult,
|
|
parent_page_id: str | None = None,
|
|
use_default_db: bool = True,
|
|
) -> dict:
|
|
"""
|
|
Export audit results to Notion.
|
|
|
|
Args:
|
|
result: AuditResult object
|
|
parent_page_id: Parent page ID (for creating new database)
|
|
use_default_db: If True, use OurDigital SEO Audit Log database
|
|
|
|
Returns:
|
|
Dict with database_id, summary_page_id, findings_created
|
|
"""
|
|
reporter = NotionReporter()
|
|
audit_id = f"{urlparse(result.url).netloc}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
|
|
|
# Add site and audit_id to all findings
|
|
for finding in result.findings:
|
|
finding.site = result.url
|
|
finding.audit_id = audit_id
|
|
|
|
if use_default_db:
|
|
# Use the default OurDigital SEO Audit Log database
|
|
page_ids = reporter.add_findings_batch(result.findings)
|
|
return {
|
|
"database_id": reporter.DEFAULT_DATABASE_ID if hasattr(reporter, 'DEFAULT_DATABASE_ID') else "2c8581e5-8a1e-8035-880b-e38cefc2f3ef",
|
|
"audit_id": audit_id,
|
|
"findings_created": len(page_ids),
|
|
}
|
|
else:
|
|
# Create new database under parent page
|
|
if not parent_page_id:
|
|
raise ValueError("parent_page_id required when not using default database")
|
|
|
|
db_title = f"SEO Audit - {urlparse(result.url).netloc} - {datetime.now().strftime('%Y-%m-%d')}"
|
|
database_id = reporter.create_findings_database(parent_page_id, db_title)
|
|
page_ids = reporter.add_findings_batch(result.findings, database_id)
|
|
|
|
# Create summary page
|
|
summary_page_id = reporter.create_audit_summary_page(
|
|
parent_page_id,
|
|
result.url,
|
|
result.summary,
|
|
)
|
|
|
|
return {
|
|
"database_id": database_id,
|
|
"summary_page_id": summary_page_id,
|
|
"audit_id": audit_id,
|
|
"findings_created": len(page_ids),
|
|
}
|
|
|
|
def generate_report(self, result: AuditResult) -> str:
|
|
"""Generate human-readable report."""
|
|
lines = [
|
|
"=" * 70,
|
|
"SEO AUDIT REPORT",
|
|
"=" * 70,
|
|
f"URL: {result.url}",
|
|
f"Date: {result.timestamp}",
|
|
"",
|
|
"-" * 70,
|
|
"SUMMARY",
|
|
"-" * 70,
|
|
f"Total Issues Found: {result.summary.get('total_findings', 0)}",
|
|
"",
|
|
]
|
|
|
|
# Priority breakdown
|
|
lines.append("Issues by Priority:")
|
|
for priority in ["Critical", "High", "Medium", "Low"]:
|
|
count = result.summary.get("findings_by_priority", {}).get(priority, 0)
|
|
if count:
|
|
lines.append(f" {priority}: {count}")
|
|
|
|
lines.append("")
|
|
|
|
# Category breakdown
|
|
lines.append("Issues by Category:")
|
|
for category, count in result.summary.get("findings_by_category", {}).items():
|
|
lines.append(f" {category}: {count}")
|
|
|
|
lines.append("")
|
|
lines.append("-" * 70)
|
|
lines.append("STATUS OVERVIEW")
|
|
lines.append("-" * 70)
|
|
|
|
# Status checks
|
|
lines.append(f"Robots.txt: {'✓ Accessible' if result.robots.get('accessible') else '✗ Not accessible'}")
|
|
lines.append(f"Sitemap: {'✓ Valid' if result.sitemap.get('valid') else '✗ Issues found'}")
|
|
lines.append(f"Schema: {'✓ Valid' if result.schema.get('valid') else '✗ Issues found'}")
|
|
|
|
perf_score = result.performance.get("scores", {}).get("performance")
|
|
if perf_score:
|
|
status = "✓ Good" if perf_score >= 90 else "⚠ Needs work" if perf_score >= 50 else "✗ Poor"
|
|
lines.append(f"Performance: {status} ({perf_score:.0f}/100)")
|
|
|
|
# Critical issues
|
|
critical = result.summary.get("critical_issues", [])
|
|
if critical:
|
|
lines.extend([
|
|
"",
|
|
"-" * 70,
|
|
"CRITICAL ISSUES (Fix Immediately)",
|
|
"-" * 70,
|
|
])
|
|
for issue in critical:
|
|
lines.append(f" • {issue}")
|
|
|
|
# Quick wins
|
|
quick_wins = result.summary.get("quick_wins", [])
|
|
if quick_wins:
|
|
lines.extend([
|
|
"",
|
|
"-" * 70,
|
|
"QUICK WINS",
|
|
"-" * 70,
|
|
])
|
|
for issue in quick_wins[:5]:
|
|
lines.append(f" • {issue}")
|
|
|
|
# All findings
|
|
if result.findings:
|
|
lines.extend([
|
|
"",
|
|
"-" * 70,
|
|
"ALL FINDINGS",
|
|
"-" * 70,
|
|
])
|
|
|
|
current_category = None
|
|
for finding in sorted(result.findings, key=lambda x: (x.category, x.priority)):
|
|
if finding.category != current_category:
|
|
current_category = finding.category
|
|
lines.append(f"\n[{current_category}]")
|
|
|
|
lines.append(f" [{finding.priority}] {finding.issue}")
|
|
if finding.recommendation:
|
|
lines.append(f" → {finding.recommendation}")
|
|
|
|
lines.extend(["", "=" * 70])
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
"""CLI entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Run comprehensive SEO audit",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Run full audit and output to console
|
|
python full_audit.py --url https://example.com
|
|
|
|
# Export to Notion
|
|
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
|
|
|
|
# Output as JSON
|
|
python full_audit.py --url https://example.com --json
|
|
""",
|
|
)
|
|
|
|
parser.add_argument("--url", "-u", required=True, help="URL to audit")
|
|
parser.add_argument("--output", "-o", choices=["console", "notion", "json"],
|
|
default="console", help="Output format")
|
|
parser.add_argument("--notion-page-id", help="Notion parent page ID (required for notion output)")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
parser.add_argument("--no-robots", action="store_true", help="Skip robots.txt check")
|
|
parser.add_argument("--no-sitemap", action="store_true", help="Skip sitemap validation")
|
|
parser.add_argument("--no-schema", action="store_true", help="Skip schema validation")
|
|
parser.add_argument("--no-performance", action="store_true", help="Skip PageSpeed analysis")
|
|
|
|
args = parser.parse_args()
|
|
|
|
auditor = SEOAuditor()
|
|
|
|
# Run audit
|
|
result = auditor.run_audit(
|
|
args.url,
|
|
include_robots=not args.no_robots,
|
|
include_sitemap=not args.no_sitemap,
|
|
include_schema=not args.no_schema,
|
|
include_performance=not args.no_performance,
|
|
)
|
|
|
|
# Output results
|
|
if args.json or args.output == "json":
|
|
print(json.dumps(result.to_dict(), indent=2, default=str))
|
|
|
|
elif args.output == "notion":
|
|
if not args.notion_page_id:
|
|
parser.error("--notion-page-id required for notion output")
|
|
notion_result = auditor.export_to_notion(result, args.notion_page_id)
|
|
print(f"Exported to Notion:")
|
|
print(f" Database ID: {notion_result['database_id']}")
|
|
print(f" Summary Page: {notion_result['summary_page_id']}")
|
|
print(f" Findings Created: {notion_result['findings_created']}")
|
|
|
|
else:
|
|
print(auditor.generate_report(result))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|