directory changes and restructuring
🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
497
custom-skills/_archive/seo-audit-agent/scripts/full_audit.py
Normal file
497
custom-skills/_archive/seo-audit-agent/scripts/full_audit.py
Normal file
@@ -0,0 +1,497 @@
|
||||
"""
|
||||
Full SEO Audit - Orchestration Script
|
||||
=====================================
|
||||
Purpose: Run comprehensive SEO audit combining all tools
|
||||
Python: 3.10+
|
||||
Usage:
|
||||
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from robots_checker import RobotsChecker
|
||||
from schema_validator import SchemaValidator
|
||||
from sitemap_validator import SitemapValidator
|
||||
from pagespeed_client import PageSpeedClient
|
||||
from notion_reporter import NotionReporter, SEOFinding
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuditResult:
|
||||
"""Complete SEO audit result."""
|
||||
|
||||
url: str
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
robots: dict = field(default_factory=dict)
|
||||
sitemap: dict = field(default_factory=dict)
|
||||
schema: dict = field(default_factory=dict)
|
||||
performance: dict = field(default_factory=dict)
|
||||
findings: list[SEOFinding] = field(default_factory=list)
|
||||
summary: dict = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"url": self.url,
|
||||
"timestamp": self.timestamp,
|
||||
"robots": self.robots,
|
||||
"sitemap": self.sitemap,
|
||||
"schema": self.schema,
|
||||
"performance": self.performance,
|
||||
"summary": self.summary,
|
||||
"findings_count": len(self.findings),
|
||||
}
|
||||
|
||||
|
||||
class SEOAuditor:
|
||||
"""Orchestrate comprehensive SEO audit."""
|
||||
|
||||
def __init__(self):
|
||||
self.robots_checker = RobotsChecker()
|
||||
self.sitemap_validator = SitemapValidator()
|
||||
self.schema_validator = SchemaValidator()
|
||||
self.pagespeed_client = PageSpeedClient()
|
||||
|
||||
def run_audit(
|
||||
self,
|
||||
url: str,
|
||||
include_robots: bool = True,
|
||||
include_sitemap: bool = True,
|
||||
include_schema: bool = True,
|
||||
include_performance: bool = True,
|
||||
) -> AuditResult:
|
||||
"""
|
||||
Run comprehensive SEO audit.
|
||||
|
||||
Args:
|
||||
url: URL to audit
|
||||
include_robots: Check robots.txt
|
||||
include_sitemap: Validate sitemap
|
||||
include_schema: Validate schema markup
|
||||
include_performance: Run PageSpeed analysis
|
||||
"""
|
||||
result = AuditResult(url=url)
|
||||
parsed_url = urlparse(url)
|
||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
|
||||
logger.info(f"Starting SEO audit for {url}")
|
||||
|
||||
# 1. Robots.txt analysis
|
||||
if include_robots:
|
||||
logger.info("Analyzing robots.txt...")
|
||||
try:
|
||||
robots_result = self.robots_checker.analyze(base_url)
|
||||
result.robots = robots_result.to_dict()
|
||||
self._process_robots_findings(robots_result, result)
|
||||
except Exception as e:
|
||||
logger.error(f"Robots.txt analysis failed: {e}")
|
||||
result.robots = {"error": str(e)}
|
||||
|
||||
# 2. Sitemap validation
|
||||
if include_sitemap:
|
||||
logger.info("Validating sitemap...")
|
||||
sitemap_url = f"{base_url}/sitemap.xml"
|
||||
# Try to get sitemap URL from robots.txt
|
||||
if result.robots.get("sitemaps"):
|
||||
sitemap_url = result.robots["sitemaps"][0]
|
||||
try:
|
||||
sitemap_result = self.sitemap_validator.validate(sitemap_url)
|
||||
result.sitemap = sitemap_result.to_dict()
|
||||
self._process_sitemap_findings(sitemap_result, result)
|
||||
except Exception as e:
|
||||
logger.error(f"Sitemap validation failed: {e}")
|
||||
result.sitemap = {"error": str(e)}
|
||||
|
||||
# 3. Schema validation
|
||||
if include_schema:
|
||||
logger.info("Validating schema markup...")
|
||||
try:
|
||||
schema_result = self.schema_validator.validate(url=url)
|
||||
result.schema = schema_result.to_dict()
|
||||
self._process_schema_findings(schema_result, result)
|
||||
except Exception as e:
|
||||
logger.error(f"Schema validation failed: {e}")
|
||||
result.schema = {"error": str(e)}
|
||||
|
||||
# 4. PageSpeed analysis
|
||||
if include_performance:
|
||||
logger.info("Running PageSpeed analysis...")
|
||||
try:
|
||||
perf_result = self.pagespeed_client.analyze(url, strategy="mobile")
|
||||
result.performance = perf_result.to_dict()
|
||||
self._process_performance_findings(perf_result, result)
|
||||
except Exception as e:
|
||||
logger.error(f"PageSpeed analysis failed: {e}")
|
||||
result.performance = {"error": str(e)}
|
||||
|
||||
# Generate summary
|
||||
result.summary = self._generate_summary(result)
|
||||
|
||||
logger.info(f"Audit complete. Found {len(result.findings)} issues.")
|
||||
return result
|
||||
|
||||
def _process_robots_findings(self, robots_result, audit_result: AuditResult):
|
||||
"""Convert robots.txt issues to findings."""
|
||||
for issue in robots_result.issues:
|
||||
priority = "Medium"
|
||||
if issue.severity == "error":
|
||||
priority = "Critical"
|
||||
elif issue.severity == "warning":
|
||||
priority = "High"
|
||||
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=issue.message,
|
||||
category="Robots.txt",
|
||||
priority=priority,
|
||||
description=issue.directive or "",
|
||||
recommendation=issue.suggestion or "",
|
||||
))
|
||||
|
||||
def _process_sitemap_findings(self, sitemap_result, audit_result: AuditResult):
|
||||
"""Convert sitemap issues to findings."""
|
||||
for issue in sitemap_result.issues:
|
||||
priority = "Medium"
|
||||
if issue.severity == "error":
|
||||
priority = "High"
|
||||
elif issue.severity == "warning":
|
||||
priority = "Medium"
|
||||
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=issue.message,
|
||||
category="Sitemap",
|
||||
priority=priority,
|
||||
url=issue.url,
|
||||
recommendation=issue.suggestion or "",
|
||||
))
|
||||
|
||||
def _process_schema_findings(self, schema_result, audit_result: AuditResult):
|
||||
"""Convert schema issues to findings."""
|
||||
for issue in schema_result.issues:
|
||||
priority = "Low"
|
||||
if issue.severity == "error":
|
||||
priority = "High"
|
||||
elif issue.severity == "warning":
|
||||
priority = "Medium"
|
||||
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=issue.message,
|
||||
category="Schema/Structured Data",
|
||||
priority=priority,
|
||||
description=f"Schema type: {issue.schema_type}" if issue.schema_type else "",
|
||||
recommendation=issue.suggestion or "",
|
||||
))
|
||||
|
||||
def _process_performance_findings(self, perf_result, audit_result: AuditResult):
|
||||
"""Convert performance issues to findings."""
|
||||
cwv = perf_result.core_web_vitals
|
||||
|
||||
# Check Core Web Vitals
|
||||
if cwv.lcp_rating == "POOR":
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=f"Poor LCP: {cwv.lcp / 1000:.2f}s (should be < 2.5s)",
|
||||
category="Performance",
|
||||
priority="Critical",
|
||||
impact="Users experience slow page loads, affecting bounce rate and rankings",
|
||||
recommendation="Optimize images, reduce server response time, use CDN",
|
||||
))
|
||||
elif cwv.lcp_rating == "NEEDS_IMPROVEMENT":
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=f"LCP needs improvement: {cwv.lcp / 1000:.2f}s (target < 2.5s)",
|
||||
category="Performance",
|
||||
priority="High",
|
||||
recommendation="Optimize largest content element loading",
|
||||
))
|
||||
|
||||
if cwv.cls_rating == "POOR":
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=f"Poor CLS: {cwv.cls:.3f} (should be < 0.1)",
|
||||
category="Performance",
|
||||
priority="High",
|
||||
impact="Layout shifts frustrate users",
|
||||
recommendation="Set dimensions for images/embeds, avoid inserting content above existing content",
|
||||
))
|
||||
|
||||
if cwv.fid_rating == "POOR":
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=f"Poor FID/TBT: {cwv.fid:.0f}ms (should be < 100ms)",
|
||||
category="Performance",
|
||||
priority="High",
|
||||
impact="Slow interactivity affects user experience",
|
||||
recommendation="Reduce JavaScript execution time, break up long tasks",
|
||||
))
|
||||
|
||||
# Check performance score
|
||||
if perf_result.performance_score and perf_result.performance_score < 50:
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=f"Low performance score: {perf_result.performance_score:.0f}/100",
|
||||
category="Performance",
|
||||
priority="High",
|
||||
impact="Poor performance affects user experience and SEO",
|
||||
recommendation="Address top opportunities from PageSpeed Insights",
|
||||
))
|
||||
|
||||
# Add top opportunities as findings
|
||||
for opp in perf_result.opportunities[:3]:
|
||||
if opp["savings_ms"] > 500: # Only significant savings
|
||||
audit_result.findings.append(SEOFinding(
|
||||
issue=opp["title"],
|
||||
category="Performance",
|
||||
priority="Medium",
|
||||
description=opp.get("description", ""),
|
||||
impact=f"Potential savings: {opp['savings_ms'] / 1000:.1f}s",
|
||||
recommendation="See PageSpeed Insights for details",
|
||||
))
|
||||
|
||||
def _generate_summary(self, result: AuditResult) -> dict:
|
||||
"""Generate audit summary."""
|
||||
findings_by_priority = {}
|
||||
findings_by_category = {}
|
||||
|
||||
for finding in result.findings:
|
||||
# Count by priority
|
||||
findings_by_priority[finding.priority] = (
|
||||
findings_by_priority.get(finding.priority, 0) + 1
|
||||
)
|
||||
# Count by category
|
||||
findings_by_category[finding.category] = (
|
||||
findings_by_category.get(finding.category, 0) + 1
|
||||
)
|
||||
|
||||
return {
|
||||
"total_findings": len(result.findings),
|
||||
"findings_by_priority": findings_by_priority,
|
||||
"findings_by_category": findings_by_category,
|
||||
"robots_accessible": result.robots.get("accessible", False),
|
||||
"sitemap_valid": result.sitemap.get("valid", False),
|
||||
"schema_valid": result.schema.get("valid", False),
|
||||
"performance_score": result.performance.get("scores", {}).get("performance"),
|
||||
"quick_wins": [
|
||||
f.issue for f in result.findings
|
||||
if f.priority in ("Medium", "Low")
|
||||
][:5],
|
||||
"critical_issues": [
|
||||
f.issue for f in result.findings
|
||||
if f.priority == "Critical"
|
||||
],
|
||||
}
|
||||
|
||||
def export_to_notion(
|
||||
self,
|
||||
result: AuditResult,
|
||||
parent_page_id: str | None = None,
|
||||
use_default_db: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Export audit results to Notion.
|
||||
|
||||
Args:
|
||||
result: AuditResult object
|
||||
parent_page_id: Parent page ID (for creating new database)
|
||||
use_default_db: If True, use OurDigital SEO Audit Log database
|
||||
|
||||
Returns:
|
||||
Dict with database_id, summary_page_id, findings_created
|
||||
"""
|
||||
reporter = NotionReporter()
|
||||
audit_id = f"{urlparse(result.url).netloc}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
||||
|
||||
# Add site and audit_id to all findings
|
||||
for finding in result.findings:
|
||||
finding.site = result.url
|
||||
finding.audit_id = audit_id
|
||||
|
||||
if use_default_db:
|
||||
# Use the default OurDigital SEO Audit Log database
|
||||
page_ids = reporter.add_findings_batch(result.findings)
|
||||
return {
|
||||
"database_id": reporter.DEFAULT_DATABASE_ID if hasattr(reporter, 'DEFAULT_DATABASE_ID') else "2c8581e5-8a1e-8035-880b-e38cefc2f3ef",
|
||||
"audit_id": audit_id,
|
||||
"findings_created": len(page_ids),
|
||||
}
|
||||
else:
|
||||
# Create new database under parent page
|
||||
if not parent_page_id:
|
||||
raise ValueError("parent_page_id required when not using default database")
|
||||
|
||||
db_title = f"SEO Audit - {urlparse(result.url).netloc} - {datetime.now().strftime('%Y-%m-%d')}"
|
||||
database_id = reporter.create_findings_database(parent_page_id, db_title)
|
||||
page_ids = reporter.add_findings_batch(result.findings, database_id)
|
||||
|
||||
# Create summary page
|
||||
summary_page_id = reporter.create_audit_summary_page(
|
||||
parent_page_id,
|
||||
result.url,
|
||||
result.summary,
|
||||
)
|
||||
|
||||
return {
|
||||
"database_id": database_id,
|
||||
"summary_page_id": summary_page_id,
|
||||
"audit_id": audit_id,
|
||||
"findings_created": len(page_ids),
|
||||
}
|
||||
|
||||
def generate_report(self, result: AuditResult) -> str:
|
||||
"""Generate human-readable report."""
|
||||
lines = [
|
||||
"=" * 70,
|
||||
"SEO AUDIT REPORT",
|
||||
"=" * 70,
|
||||
f"URL: {result.url}",
|
||||
f"Date: {result.timestamp}",
|
||||
"",
|
||||
"-" * 70,
|
||||
"SUMMARY",
|
||||
"-" * 70,
|
||||
f"Total Issues Found: {result.summary.get('total_findings', 0)}",
|
||||
"",
|
||||
]
|
||||
|
||||
# Priority breakdown
|
||||
lines.append("Issues by Priority:")
|
||||
for priority in ["Critical", "High", "Medium", "Low"]:
|
||||
count = result.summary.get("findings_by_priority", {}).get(priority, 0)
|
||||
if count:
|
||||
lines.append(f" {priority}: {count}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Category breakdown
|
||||
lines.append("Issues by Category:")
|
||||
for category, count in result.summary.get("findings_by_category", {}).items():
|
||||
lines.append(f" {category}: {count}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("-" * 70)
|
||||
lines.append("STATUS OVERVIEW")
|
||||
lines.append("-" * 70)
|
||||
|
||||
# Status checks
|
||||
lines.append(f"Robots.txt: {'✓ Accessible' if result.robots.get('accessible') else '✗ Not accessible'}")
|
||||
lines.append(f"Sitemap: {'✓ Valid' if result.sitemap.get('valid') else '✗ Issues found'}")
|
||||
lines.append(f"Schema: {'✓ Valid' if result.schema.get('valid') else '✗ Issues found'}")
|
||||
|
||||
perf_score = result.performance.get("scores", {}).get("performance")
|
||||
if perf_score:
|
||||
status = "✓ Good" if perf_score >= 90 else "⚠ Needs work" if perf_score >= 50 else "✗ Poor"
|
||||
lines.append(f"Performance: {status} ({perf_score:.0f}/100)")
|
||||
|
||||
# Critical issues
|
||||
critical = result.summary.get("critical_issues", [])
|
||||
if critical:
|
||||
lines.extend([
|
||||
"",
|
||||
"-" * 70,
|
||||
"CRITICAL ISSUES (Fix Immediately)",
|
||||
"-" * 70,
|
||||
])
|
||||
for issue in critical:
|
||||
lines.append(f" • {issue}")
|
||||
|
||||
# Quick wins
|
||||
quick_wins = result.summary.get("quick_wins", [])
|
||||
if quick_wins:
|
||||
lines.extend([
|
||||
"",
|
||||
"-" * 70,
|
||||
"QUICK WINS",
|
||||
"-" * 70,
|
||||
])
|
||||
for issue in quick_wins[:5]:
|
||||
lines.append(f" • {issue}")
|
||||
|
||||
# All findings
|
||||
if result.findings:
|
||||
lines.extend([
|
||||
"",
|
||||
"-" * 70,
|
||||
"ALL FINDINGS",
|
||||
"-" * 70,
|
||||
])
|
||||
|
||||
current_category = None
|
||||
for finding in sorted(result.findings, key=lambda x: (x.category, x.priority)):
|
||||
if finding.category != current_category:
|
||||
current_category = finding.category
|
||||
lines.append(f"\n[{current_category}]")
|
||||
|
||||
lines.append(f" [{finding.priority}] {finding.issue}")
|
||||
if finding.recommendation:
|
||||
lines.append(f" → {finding.recommendation}")
|
||||
|
||||
lines.extend(["", "=" * 70])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run comprehensive SEO audit",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Run full audit and output to console
|
||||
python full_audit.py --url https://example.com
|
||||
|
||||
# Export to Notion
|
||||
python full_audit.py --url https://example.com --output notion --notion-page-id abc123
|
||||
|
||||
# Output as JSON
|
||||
python full_audit.py --url https://example.com --json
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--url", "-u", required=True, help="URL to audit")
|
||||
parser.add_argument("--output", "-o", choices=["console", "notion", "json"],
|
||||
default="console", help="Output format")
|
||||
parser.add_argument("--notion-page-id", help="Notion parent page ID (required for notion output)")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--no-robots", action="store_true", help="Skip robots.txt check")
|
||||
parser.add_argument("--no-sitemap", action="store_true", help="Skip sitemap validation")
|
||||
parser.add_argument("--no-schema", action="store_true", help="Skip schema validation")
|
||||
parser.add_argument("--no-performance", action="store_true", help="Skip PageSpeed analysis")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
auditor = SEOAuditor()
|
||||
|
||||
# Run audit
|
||||
result = auditor.run_audit(
|
||||
args.url,
|
||||
include_robots=not args.no_robots,
|
||||
include_sitemap=not args.no_sitemap,
|
||||
include_schema=not args.no_schema,
|
||||
include_performance=not args.no_performance,
|
||||
)
|
||||
|
||||
# Output results
|
||||
if args.json or args.output == "json":
|
||||
print(json.dumps(result.to_dict(), indent=2, default=str))
|
||||
|
||||
elif args.output == "notion":
|
||||
if not args.notion_page_id:
|
||||
parser.error("--notion-page-id required for notion output")
|
||||
notion_result = auditor.export_to_notion(result, args.notion_page_id)
|
||||
print(f"Exported to Notion:")
|
||||
print(f" Database ID: {notion_result['database_id']}")
|
||||
print(f" Summary Page: {notion_result['summary_page_id']}")
|
||||
print(f" Findings Created: {notion_result['findings_created']}")
|
||||
|
||||
else:
|
||||
print(auditor.generate_report(result))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user