""" Full SEO Audit - Orchestration Script ===================================== Purpose: Run comprehensive SEO audit combining all tools Python: 3.10+ Usage: python full_audit.py --url https://example.com --output notion --notion-page-id abc123 """ import argparse import json import logging from dataclasses import dataclass, field from datetime import datetime from typing import Any from urllib.parse import urlparse from robots_checker import RobotsChecker from schema_validator import SchemaValidator from sitemap_validator import SitemapValidator from pagespeed_client import PageSpeedClient from notion_reporter import NotionReporter, SEOFinding logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) @dataclass class AuditResult: """Complete SEO audit result.""" url: str timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) robots: dict = field(default_factory=dict) sitemap: dict = field(default_factory=dict) schema: dict = field(default_factory=dict) performance: dict = field(default_factory=dict) findings: list[SEOFinding] = field(default_factory=list) summary: dict = field(default_factory=dict) def to_dict(self) -> dict: return { "url": self.url, "timestamp": self.timestamp, "robots": self.robots, "sitemap": self.sitemap, "schema": self.schema, "performance": self.performance, "summary": self.summary, "findings_count": len(self.findings), } class SEOAuditor: """Orchestrate comprehensive SEO audit.""" def __init__(self): self.robots_checker = RobotsChecker() self.sitemap_validator = SitemapValidator() self.schema_validator = SchemaValidator() self.pagespeed_client = PageSpeedClient() def run_audit( self, url: str, include_robots: bool = True, include_sitemap: bool = True, include_schema: bool = True, include_performance: bool = True, ) -> AuditResult: """ Run comprehensive SEO audit. Args: url: URL to audit include_robots: Check robots.txt include_sitemap: Validate sitemap include_schema: Validate schema markup include_performance: Run PageSpeed analysis """ result = AuditResult(url=url) parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" logger.info(f"Starting SEO audit for {url}") # 1. Robots.txt analysis if include_robots: logger.info("Analyzing robots.txt...") try: robots_result = self.robots_checker.analyze(base_url) result.robots = robots_result.to_dict() self._process_robots_findings(robots_result, result) except Exception as e: logger.error(f"Robots.txt analysis failed: {e}") result.robots = {"error": str(e)} # 2. Sitemap validation if include_sitemap: logger.info("Validating sitemap...") sitemap_url = f"{base_url}/sitemap.xml" # Try to get sitemap URL from robots.txt if result.robots.get("sitemaps"): sitemap_url = result.robots["sitemaps"][0] try: sitemap_result = self.sitemap_validator.validate(sitemap_url) result.sitemap = sitemap_result.to_dict() self._process_sitemap_findings(sitemap_result, result) except Exception as e: logger.error(f"Sitemap validation failed: {e}") result.sitemap = {"error": str(e)} # 3. Schema validation if include_schema: logger.info("Validating schema markup...") try: schema_result = self.schema_validator.validate(url=url) result.schema = schema_result.to_dict() self._process_schema_findings(schema_result, result) except Exception as e: logger.error(f"Schema validation failed: {e}") result.schema = {"error": str(e)} # 4. PageSpeed analysis if include_performance: logger.info("Running PageSpeed analysis...") try: perf_result = self.pagespeed_client.analyze(url, strategy="mobile") result.performance = perf_result.to_dict() self._process_performance_findings(perf_result, result) except Exception as e: logger.error(f"PageSpeed analysis failed: {e}") result.performance = {"error": str(e)} # Generate summary result.summary = self._generate_summary(result) logger.info(f"Audit complete. Found {len(result.findings)} issues.") return result def _process_robots_findings(self, robots_result, audit_result: AuditResult): """Convert robots.txt issues to findings.""" for issue in robots_result.issues: priority = "Medium" if issue.severity == "error": priority = "Critical" elif issue.severity == "warning": priority = "High" audit_result.findings.append(SEOFinding( issue=issue.message, category="Robots.txt", priority=priority, description=issue.directive or "", recommendation=issue.suggestion or "", )) def _process_sitemap_findings(self, sitemap_result, audit_result: AuditResult): """Convert sitemap issues to findings.""" for issue in sitemap_result.issues: priority = "Medium" if issue.severity == "error": priority = "High" elif issue.severity == "warning": priority = "Medium" audit_result.findings.append(SEOFinding( issue=issue.message, category="Sitemap", priority=priority, url=issue.url, recommendation=issue.suggestion or "", )) def _process_schema_findings(self, schema_result, audit_result: AuditResult): """Convert schema issues to findings.""" for issue in schema_result.issues: priority = "Low" if issue.severity == "error": priority = "High" elif issue.severity == "warning": priority = "Medium" audit_result.findings.append(SEOFinding( issue=issue.message, category="Schema/Structured Data", priority=priority, description=f"Schema type: {issue.schema_type}" if issue.schema_type else "", recommendation=issue.suggestion or "", )) def _process_performance_findings(self, perf_result, audit_result: AuditResult): """Convert performance issues to findings.""" cwv = perf_result.core_web_vitals # Check Core Web Vitals if cwv.lcp_rating == "POOR": audit_result.findings.append(SEOFinding( issue=f"Poor LCP: {cwv.lcp / 1000:.2f}s (should be < 2.5s)", category="Performance", priority="Critical", impact="Users experience slow page loads, affecting bounce rate and rankings", recommendation="Optimize images, reduce server response time, use CDN", )) elif cwv.lcp_rating == "NEEDS_IMPROVEMENT": audit_result.findings.append(SEOFinding( issue=f"LCP needs improvement: {cwv.lcp / 1000:.2f}s (target < 2.5s)", category="Performance", priority="High", recommendation="Optimize largest content element loading", )) if cwv.cls_rating == "POOR": audit_result.findings.append(SEOFinding( issue=f"Poor CLS: {cwv.cls:.3f} (should be < 0.1)", category="Performance", priority="High", impact="Layout shifts frustrate users", recommendation="Set dimensions for images/embeds, avoid inserting content above existing content", )) if cwv.fid_rating == "POOR": audit_result.findings.append(SEOFinding( issue=f"Poor FID/TBT: {cwv.fid:.0f}ms (should be < 100ms)", category="Performance", priority="High", impact="Slow interactivity affects user experience", recommendation="Reduce JavaScript execution time, break up long tasks", )) # Check performance score if perf_result.performance_score and perf_result.performance_score < 50: audit_result.findings.append(SEOFinding( issue=f"Low performance score: {perf_result.performance_score:.0f}/100", category="Performance", priority="High", impact="Poor performance affects user experience and SEO", recommendation="Address top opportunities from PageSpeed Insights", )) # Add top opportunities as findings for opp in perf_result.opportunities[:3]: if opp["savings_ms"] > 500: # Only significant savings audit_result.findings.append(SEOFinding( issue=opp["title"], category="Performance", priority="Medium", description=opp.get("description", ""), impact=f"Potential savings: {opp['savings_ms'] / 1000:.1f}s", recommendation="See PageSpeed Insights for details", )) def _generate_summary(self, result: AuditResult) -> dict: """Generate audit summary.""" findings_by_priority = {} findings_by_category = {} for finding in result.findings: # Count by priority findings_by_priority[finding.priority] = ( findings_by_priority.get(finding.priority, 0) + 1 ) # Count by category findings_by_category[finding.category] = ( findings_by_category.get(finding.category, 0) + 1 ) return { "total_findings": len(result.findings), "findings_by_priority": findings_by_priority, "findings_by_category": findings_by_category, "robots_accessible": result.robots.get("accessible", False), "sitemap_valid": result.sitemap.get("valid", False), "schema_valid": result.schema.get("valid", False), "performance_score": result.performance.get("scores", {}).get("performance"), "quick_wins": [ f.issue for f in result.findings if f.priority in ("Medium", "Low") ][:5], "critical_issues": [ f.issue for f in result.findings if f.priority == "Critical" ], } def export_to_notion( self, result: AuditResult, parent_page_id: str | None = None, use_default_db: bool = True, ) -> dict: """ Export audit results to Notion. Args: result: AuditResult object parent_page_id: Parent page ID (for creating new database) use_default_db: If True, use OurDigital SEO Audit Log database Returns: Dict with database_id, summary_page_id, findings_created """ reporter = NotionReporter() audit_id = f"{urlparse(result.url).netloc}-{datetime.now().strftime('%Y%m%d-%H%M%S')}" # Add site and audit_id to all findings for finding in result.findings: finding.site = result.url finding.audit_id = audit_id if use_default_db: # Use the default OurDigital SEO Audit Log database page_ids = reporter.add_findings_batch(result.findings) return { "database_id": reporter.DEFAULT_DATABASE_ID if hasattr(reporter, 'DEFAULT_DATABASE_ID') else "2c8581e5-8a1e-8035-880b-e38cefc2f3ef", "audit_id": audit_id, "findings_created": len(page_ids), } else: # Create new database under parent page if not parent_page_id: raise ValueError("parent_page_id required when not using default database") db_title = f"SEO Audit - {urlparse(result.url).netloc} - {datetime.now().strftime('%Y-%m-%d')}" database_id = reporter.create_findings_database(parent_page_id, db_title) page_ids = reporter.add_findings_batch(result.findings, database_id) # Create summary page summary_page_id = reporter.create_audit_summary_page( parent_page_id, result.url, result.summary, ) return { "database_id": database_id, "summary_page_id": summary_page_id, "audit_id": audit_id, "findings_created": len(page_ids), } def generate_report(self, result: AuditResult) -> str: """Generate human-readable report.""" lines = [ "=" * 70, "SEO AUDIT REPORT", "=" * 70, f"URL: {result.url}", f"Date: {result.timestamp}", "", "-" * 70, "SUMMARY", "-" * 70, f"Total Issues Found: {result.summary.get('total_findings', 0)}", "", ] # Priority breakdown lines.append("Issues by Priority:") for priority in ["Critical", "High", "Medium", "Low"]: count = result.summary.get("findings_by_priority", {}).get(priority, 0) if count: lines.append(f" {priority}: {count}") lines.append("") # Category breakdown lines.append("Issues by Category:") for category, count in result.summary.get("findings_by_category", {}).items(): lines.append(f" {category}: {count}") lines.append("") lines.append("-" * 70) lines.append("STATUS OVERVIEW") lines.append("-" * 70) # Status checks lines.append(f"Robots.txt: {'✓ Accessible' if result.robots.get('accessible') else '✗ Not accessible'}") lines.append(f"Sitemap: {'✓ Valid' if result.sitemap.get('valid') else '✗ Issues found'}") lines.append(f"Schema: {'✓ Valid' if result.schema.get('valid') else '✗ Issues found'}") perf_score = result.performance.get("scores", {}).get("performance") if perf_score: status = "✓ Good" if perf_score >= 90 else "⚠ Needs work" if perf_score >= 50 else "✗ Poor" lines.append(f"Performance: {status} ({perf_score:.0f}/100)") # Critical issues critical = result.summary.get("critical_issues", []) if critical: lines.extend([ "", "-" * 70, "CRITICAL ISSUES (Fix Immediately)", "-" * 70, ]) for issue in critical: lines.append(f" • {issue}") # Quick wins quick_wins = result.summary.get("quick_wins", []) if quick_wins: lines.extend([ "", "-" * 70, "QUICK WINS", "-" * 70, ]) for issue in quick_wins[:5]: lines.append(f" • {issue}") # All findings if result.findings: lines.extend([ "", "-" * 70, "ALL FINDINGS", "-" * 70, ]) current_category = None for finding in sorted(result.findings, key=lambda x: (x.category, x.priority)): if finding.category != current_category: current_category = finding.category lines.append(f"\n[{current_category}]") lines.append(f" [{finding.priority}] {finding.issue}") if finding.recommendation: lines.append(f" → {finding.recommendation}") lines.extend(["", "=" * 70]) return "\n".join(lines) def main(): """CLI entry point.""" parser = argparse.ArgumentParser( description="Run comprehensive SEO audit", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Run full audit and output to console python full_audit.py --url https://example.com # Export to Notion python full_audit.py --url https://example.com --output notion --notion-page-id abc123 # Output as JSON python full_audit.py --url https://example.com --json """, ) parser.add_argument("--url", "-u", required=True, help="URL to audit") parser.add_argument("--output", "-o", choices=["console", "notion", "json"], default="console", help="Output format") parser.add_argument("--notion-page-id", help="Notion parent page ID (required for notion output)") parser.add_argument("--json", action="store_true", help="Output as JSON") parser.add_argument("--no-robots", action="store_true", help="Skip robots.txt check") parser.add_argument("--no-sitemap", action="store_true", help="Skip sitemap validation") parser.add_argument("--no-schema", action="store_true", help="Skip schema validation") parser.add_argument("--no-performance", action="store_true", help="Skip PageSpeed analysis") args = parser.parse_args() auditor = SEOAuditor() # Run audit result = auditor.run_audit( args.url, include_robots=not args.no_robots, include_sitemap=not args.no_sitemap, include_schema=not args.no_schema, include_performance=not args.no_performance, ) # Output results if args.json or args.output == "json": print(json.dumps(result.to_dict(), indent=2, default=str)) elif args.output == "notion": if not args.notion_page_id: parser.error("--notion-page-id required for notion output") notion_result = auditor.export_to_notion(result, args.notion_page_id) print(f"Exported to Notion:") print(f" Database ID: {notion_result['database_id']}") print(f" Summary Page: {notion_result['summary_page_id']}") print(f" Findings Created: {notion_result['findings_created']}") else: print(auditor.generate_report(result)) if __name__ == "__main__": main()