#!/usr/bin/env python3 """ SEO Comprehensive Audit Orchestrator Runs 6 sub-skill audits sequentially, merges results, and computes a weighted health score (0-100). Usage: python seo_audit_orchestrator.py --url https://example.com python seo_audit_orchestrator.py --url https://example.com --skip-local --skip-gsc python seo_audit_orchestrator.py --url https://example.com --json """ import argparse import json import subprocess import sys from datetime import datetime from pathlib import Path from urllib.parse import urlparse # Health score weights (must sum to 1.0) WEIGHTS = { "technical": 0.20, "on_page": 0.20, "core_web_vitals": 0.25, "schema": 0.15, "local_seo": 0.10, "search_console": 0.10, } GRADE_THRESHOLDS = [ (90, "A", "Excellent"), (80, "B+", "Good"), (70, "B", "Above Average"), (60, "C", "Needs Improvement"), (40, "D", "Poor"), (0, "F", "Critical"), ] def get_repo_root(): """Find the git repository root.""" try: result = subprocess.run( ["git", "rev-parse", "--show-toplevel"], capture_output=True, text=True, check=True ) return Path(result.stdout.strip()) except subprocess.CalledProcessError: # Fallback: walk up from this script's location current = Path(__file__).resolve().parent while current != current.parent: if (current / ".git").exists(): return current current = current.parent print("Error: Could not find git repository root.", file=sys.stderr) sys.exit(1) def run_script(script_path, args, stage_name): """Run a sub-skill script and return parsed JSON output.""" if not script_path.exists(): return { "status": "skipped", "reason": f"Script not found: {script_path}", "score": None, "issues": [], } cmd = [sys.executable, str(script_path)] + args + ["--json"] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=120 ) if result.returncode == 0 and result.stdout.strip(): return json.loads(result.stdout.strip()) else: return { "status": "error", "reason": result.stderr.strip() or f"Exit code {result.returncode}", "score": None, "issues": [], } except subprocess.TimeoutExpired: return { "status": "timeout", "reason": f"{stage_name} timed out after 120s", "score": None, "issues": [], } except json.JSONDecodeError as e: return { "status": "error", "reason": f"Invalid JSON output: {e}", "score": None, "issues": [], } except Exception as e: return { "status": "error", "reason": str(e), "score": None, "issues": [], } def extract_score(result): """Extract a 0-100 score from a sub-skill result.""" if isinstance(result, dict): # Try common score fields for key in ("score", "health_score", "overall_score"): if key in result and isinstance(result[key], (int, float)): return min(100, max(0, result[key])) # Try nested core_web_vitals score if "core_web_vitals" in result: cwv = result["core_web_vitals"] if isinstance(cwv, dict) and "score" in cwv: return min(100, max(0, cwv["score"])) return None def extract_issues(result): """Extract issues list from a sub-skill result.""" if isinstance(result, dict): issues = result.get("issues", []) if isinstance(issues, list): return issues return [] def compute_health_score(stages, skipped): """Compute weighted health score, redistributing skipped stage weights.""" active_weights = {k: v for k, v in WEIGHTS.items() if k not in skipped} total_active_weight = sum(active_weights.values()) if total_active_weight == 0: return 0 weighted_sum = 0 for stage_name, weight in active_weights.items(): stage_data = stages.get(stage_name, {}) score = stage_data.get("score") if score is not None: normalized_weight = weight / total_active_weight weighted_sum += score * normalized_weight return round(weighted_sum, 1) def get_grade(score): """Return grade and status for a given score.""" for threshold, grade, status in GRADE_THRESHOLDS: if score >= threshold: return grade, status return "F", "Critical" def get_priority(score): """Return Notion priority based on health score.""" if score < 40: return "Critical" elif score < 60: return "High" elif score < 80: return "Medium" return "Low" def main(): parser = argparse.ArgumentParser(description="Comprehensive SEO Audit Orchestrator") parser.add_argument("--url", required=True, help="URL to audit") parser.add_argument("--skip-local", action="store_true", help="Skip Local SEO stage") parser.add_argument("--skip-gsc", action="store_true", help="Skip Search Console stage") parser.add_argument("--json", action="store_true", help="Output JSON only") parser.add_argument("--skills-dir", help="Path to custom-skills directory") args = parser.parse_args() url = args.url domain = urlparse(url).netloc or url audit_date = datetime.now().strftime("%Y-%m-%d") audit_id = f"COMP-{datetime.now().strftime('%Y%m%d')}-001" # Resolve skills directory if args.skills_dir: skills_dir = Path(args.skills_dir) else: repo_root = get_repo_root() skills_dir = repo_root / "custom-skills" skipped = set() if args.skip_local: skipped.add("local_seo") if args.skip_gsc: skipped.add("search_console") stages = {} # Stage 1: Technical SEO if not args.json: print("[1/6] Running Technical SEO audit...") robots_result = run_script( skills_dir / "12-seo-technical-audit/code/scripts/robots_checker.py", ["--url", url], "robots_checker" ) sitemap_url = f"{url.rstrip('/')}/sitemap.xml" sitemap_result = run_script( skills_dir / "12-seo-technical-audit/code/scripts/sitemap_validator.py", ["--url", sitemap_url], "sitemap_validator" ) tech_score = extract_score(robots_result) sitemap_score = extract_score(sitemap_result) if tech_score is not None and sitemap_score is not None: combined_tech_score = round((tech_score + sitemap_score) / 2) elif tech_score is not None: combined_tech_score = tech_score elif sitemap_score is not None: combined_tech_score = sitemap_score else: combined_tech_score = None stages["technical"] = { "score": combined_tech_score, "weight": WEIGHTS["technical"], "issues": extract_issues(robots_result) + extract_issues(sitemap_result), "raw": {"robots": robots_result, "sitemap": sitemap_result}, } # Stage 2: On-Page SEO if not args.json: print("[2/6] Running On-Page SEO audit...") on_page_result = run_script( skills_dir / "13-seo-on-page-audit/code/scripts/page_analyzer.py", ["--url", url], "page_analyzer" ) stages["on_page"] = { "score": extract_score(on_page_result), "weight": WEIGHTS["on_page"], "issues": extract_issues(on_page_result), "raw": on_page_result, } # Stage 3: Core Web Vitals if not args.json: print("[3/6] Running Core Web Vitals audit...") cwv_result = run_script( skills_dir / "14-seo-core-web-vitals/code/scripts/pagespeed_client.py", ["--url", url], "pagespeed_client" ) stages["core_web_vitals"] = { "score": extract_score(cwv_result), "weight": WEIGHTS["core_web_vitals"], "issues": extract_issues(cwv_result), "raw": cwv_result, } # Stage 4: Schema Validation if not args.json: print("[4/6] Running Schema validation...") schema_result = run_script( skills_dir / "16-seo-schema-validator/code/scripts/schema_validator.py", ["--url", url], "schema_validator" ) stages["schema"] = { "score": extract_score(schema_result), "weight": WEIGHTS["schema"], "issues": extract_issues(schema_result), "raw": schema_result, } # Stage 5: Local SEO if "local_seo" not in skipped: if not args.json: print("[5/6] Local SEO stage (prompt-driven, skipping in script mode)...") stages["local_seo"] = { "score": None, "weight": WEIGHTS["local_seo"], "issues": [], "raw": {"status": "prompt_driven", "note": "Local SEO requires interactive analysis via 18-seo-local-audit"}, } else: if not args.json: print("[5/6] Skipping Local SEO...") # Stage 6: Search Console if "search_console" not in skipped: if not args.json: print("[6/6] Running Search Console analysis...") gsc_result = run_script( skills_dir / "15-seo-search-console/code/scripts/gsc_client.py", ["--url", url], "gsc_client" ) stages["search_console"] = { "score": extract_score(gsc_result), "weight": WEIGHTS["search_console"], "issues": extract_issues(gsc_result), "raw": gsc_result, } else: if not args.json: print("[6/6] Skipping Search Console...") # Compute health score health_score = compute_health_score(stages, skipped) grade, status = get_grade(health_score) priority = get_priority(health_score) # Collect critical issues critical_issues = [] for stage_name, stage_data in stages.items(): for issue in stage_data.get("issues", []): if isinstance(issue, dict) and issue.get("type") in ("error", "critical"): critical_issues.append({**issue, "stage": stage_name}) # Build output output = { "url": url, "domain": domain, "audit_date": audit_date, "audit_id": audit_id, "health_score": health_score, "grade": grade, "status": status, "priority": priority, "skipped_stages": list(skipped), "stages": { k: { "score": v.get("score"), "weight": v.get("weight"), "issues": v.get("issues", []), } for k, v in stages.items() }, "critical_issues": critical_issues, "notion": { "database_id": "2c8581e5-8a1e-8035-880b-e38cefc2f3ef", "title": f"종합 SEO 감사 보고서 - {domain} - {audit_date}", "category": "Comprehensive Audit", "priority": priority, "audit_id": audit_id, }, } print(json.dumps(output, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()