our-claude-skills/custom-skills/11-seo-comprehensive-audit/code/scripts/seo_audit_orchestrator.py

#!/usr/bin/env python3
"""
SEO Comprehensive Audit Orchestrator

Runs 6 sub-skill audits sequentially, merges results, and computes a weighted
health score (0-100).

Usage:
    python seo_audit_orchestrator.py --url https://example.com
    python seo_audit_orchestrator.py --url https://example.com --skip-local --skip-gsc
    python seo_audit_orchestrator.py --url https://example.com --json
"""

import argparse
import json
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from urllib.parse import urlparse


# Health score weights (must sum to 1.0)
WEIGHTS = {
    "technical": 0.20,
    "on_page": 0.20,
    "core_web_vitals": 0.25,
    "schema": 0.15,
    "local_seo": 0.10,
    "search_console": 0.10,
}

GRADE_THRESHOLDS = [
    (90, "A", "Excellent"),
    (80, "B+", "Good"),
    (70, "B", "Above Average"),
    (60, "C", "Needs Improvement"),
    (40, "D", "Poor"),
    (0, "F", "Critical"),
]


def get_repo_root():
    """Find the git repository root."""
    try:
        result = subprocess.run(
            ["git", "rev-parse", "--show-toplevel"],
            capture_output=True, text=True, check=True
        )
        return Path(result.stdout.strip())
    except subprocess.CalledProcessError:
        # Fallback: walk up from this script's location
        current = Path(__file__).resolve().parent
        while current != current.parent:
            if (current / ".git").exists():
                return current
            current = current.parent
        print("Error: Could not find git repository root.", file=sys.stderr)
        sys.exit(1)


def run_script(script_path, args, stage_name):
    """Run a sub-skill script and return parsed JSON output."""
    if not script_path.exists():
        return {
            "status": "skipped",
            "reason": f"Script not found: {script_path}",
            "score": None,
            "issues": [],
        }

    cmd = [sys.executable, str(script_path)] + args + ["--json"]
    try:
        result = subprocess.run(
            cmd, capture_output=True, text=True, timeout=120
        )
        if result.returncode == 0 and result.stdout.strip():
            return json.loads(result.stdout.strip())
        else:
            return {
                "status": "error",
                "reason": result.stderr.strip() or f"Exit code {result.returncode}",
                "score": None,
                "issues": [],
            }
    except subprocess.TimeoutExpired:
        return {
            "status": "timeout",
            "reason": f"{stage_name} timed out after 120s",
            "score": None,
            "issues": [],
        }
    except json.JSONDecodeError as e:
        return {
            "status": "error",
            "reason": f"Invalid JSON output: {e}",
            "score": None,
            "issues": [],
        }
    except Exception as e:
        return {
            "status": "error",
            "reason": str(e),
            "score": None,
            "issues": [],
        }


def extract_score(result):
    """Extract a 0-100 score from a sub-skill result."""
    if isinstance(result, dict):
        # Try common score fields
        for key in ("score", "health_score", "overall_score"):
            if key in result and isinstance(result[key], (int, float)):
                return min(100, max(0, result[key]))
        # Try nested core_web_vitals score
        if "core_web_vitals" in result:
            cwv = result["core_web_vitals"]
            if isinstance(cwv, dict) and "score" in cwv:
                return min(100, max(0, cwv["score"]))
    return None


def extract_issues(result):
    """Extract issues list from a sub-skill result."""
    if isinstance(result, dict):
        issues = result.get("issues", [])
        if isinstance(issues, list):
            return issues
    return []


def compute_health_score(stages, skipped):
    """Compute weighted health score, redistributing skipped stage weights."""
    active_weights = {k: v for k, v in WEIGHTS.items() if k not in skipped}
    total_active_weight = sum(active_weights.values())

    if total_active_weight == 0:
        return 0

    weighted_sum = 0
    for stage_name, weight in active_weights.items():
        stage_data = stages.get(stage_name, {})
        score = stage_data.get("score")
        if score is not None:
            normalized_weight = weight / total_active_weight
            weighted_sum += score * normalized_weight

    return round(weighted_sum, 1)


def get_grade(score):
    """Return grade and status for a given score."""
    for threshold, grade, status in GRADE_THRESHOLDS:
        if score >= threshold:
            return grade, status
    return "F", "Critical"


def get_priority(score):
    """Return Notion priority based on health score."""
    if score < 40:
        return "Critical"
    elif score < 60:
        return "High"
    elif score < 80:
        return "Medium"
    return "Low"


def main():
    parser = argparse.ArgumentParser(description="Comprehensive SEO Audit Orchestrator")
    parser.add_argument("--url", required=True, help="URL to audit")
    parser.add_argument("--skip-local", action="store_true", help="Skip Local SEO stage")
    parser.add_argument("--skip-gsc", action="store_true", help="Skip Search Console stage")
    parser.add_argument("--json", action="store_true", help="Output JSON only")
    parser.add_argument("--skills-dir", help="Path to custom-skills directory")
    args = parser.parse_args()

    url = args.url
    domain = urlparse(url).netloc or url
    audit_date = datetime.now().strftime("%Y-%m-%d")
    audit_id = f"COMP-{datetime.now().strftime('%Y%m%d')}-001"

    # Resolve skills directory
    if args.skills_dir:
        skills_dir = Path(args.skills_dir)
    else:
        repo_root = get_repo_root()
        skills_dir = repo_root / "custom-skills"

    skipped = set()
    if args.skip_local:
        skipped.add("local_seo")
    if args.skip_gsc:
        skipped.add("search_console")

    stages = {}

    # Stage 1: Technical SEO
    if not args.json:
        print("[1/6] Running Technical SEO audit...")
    robots_result = run_script(
        skills_dir / "12-seo-technical-audit/code/scripts/robots_checker.py",
        ["--url", url], "robots_checker"
    )
    sitemap_url = f"{url.rstrip('/')}/sitemap.xml"
    sitemap_result = run_script(
        skills_dir / "12-seo-technical-audit/code/scripts/sitemap_validator.py",
        ["--url", sitemap_url], "sitemap_validator"
    )
    tech_score = extract_score(robots_result)
    sitemap_score = extract_score(sitemap_result)
    if tech_score is not None and sitemap_score is not None:
        combined_tech_score = round((tech_score + sitemap_score) / 2)
    elif tech_score is not None:
        combined_tech_score = tech_score
    elif sitemap_score is not None:
        combined_tech_score = sitemap_score
    else:
        combined_tech_score = None
    stages["technical"] = {
        "score": combined_tech_score,
        "weight": WEIGHTS["technical"],
        "issues": extract_issues(robots_result) + extract_issues(sitemap_result),
        "raw": {"robots": robots_result, "sitemap": sitemap_result},
    }

    # Stage 2: On-Page SEO
    if not args.json:
        print("[2/6] Running On-Page SEO audit...")
    on_page_result = run_script(
        skills_dir / "13-seo-on-page-audit/code/scripts/page_analyzer.py",
        ["--url", url], "page_analyzer"
    )
    stages["on_page"] = {
        "score": extract_score(on_page_result),
        "weight": WEIGHTS["on_page"],
        "issues": extract_issues(on_page_result),
        "raw": on_page_result,
    }

    # Stage 3: Core Web Vitals
    if not args.json:
        print("[3/6] Running Core Web Vitals audit...")
    cwv_result = run_script(
        skills_dir / "14-seo-core-web-vitals/code/scripts/pagespeed_client.py",
        ["--url", url], "pagespeed_client"
    )
    stages["core_web_vitals"] = {
        "score": extract_score(cwv_result),
        "weight": WEIGHTS["core_web_vitals"],
        "issues": extract_issues(cwv_result),
        "raw": cwv_result,
    }

    # Stage 4: Schema Validation
    if not args.json:
        print("[4/6] Running Schema validation...")
    schema_result = run_script(
        skills_dir / "16-seo-schema-validator/code/scripts/schema_validator.py",
        ["--url", url], "schema_validator"
    )
    stages["schema"] = {
        "score": extract_score(schema_result),
        "weight": WEIGHTS["schema"],
        "issues": extract_issues(schema_result),
        "raw": schema_result,
    }

    # Stage 5: Local SEO
    if "local_seo" not in skipped:
        if not args.json:
            print("[5/6] Local SEO stage (prompt-driven, skipping in script mode)...")
        stages["local_seo"] = {
            "score": None,
            "weight": WEIGHTS["local_seo"],
            "issues": [],
            "raw": {"status": "prompt_driven", "note": "Local SEO requires interactive analysis via 18-seo-local-audit"},
        }
    else:
        if not args.json:
            print("[5/6] Skipping Local SEO...")

    # Stage 6: Search Console
    if "search_console" not in skipped:
        if not args.json:
            print("[6/6] Running Search Console analysis...")
        gsc_result = run_script(
            skills_dir / "15-seo-search-console/code/scripts/gsc_client.py",
            ["--url", url], "gsc_client"
        )
        stages["search_console"] = {
            "score": extract_score(gsc_result),
            "weight": WEIGHTS["search_console"],
            "issues": extract_issues(gsc_result),
            "raw": gsc_result,
        }
    else:
        if not args.json:
            print("[6/6] Skipping Search Console...")

    # Compute health score
    health_score = compute_health_score(stages, skipped)
    grade, status = get_grade(health_score)
    priority = get_priority(health_score)

    # Collect critical issues
    critical_issues = []
    for stage_name, stage_data in stages.items():
        for issue in stage_data.get("issues", []):
            if isinstance(issue, dict) and issue.get("type") in ("error", "critical"):
                critical_issues.append({**issue, "stage": stage_name})

    # Build output
    output = {
        "url": url,
        "domain": domain,
        "audit_date": audit_date,
        "audit_id": audit_id,
        "health_score": health_score,
        "grade": grade,
        "status": status,
        "priority": priority,
        "skipped_stages": list(skipped),
        "stages": {
            k: {
                "score": v.get("score"),
                "weight": v.get("weight"),
                "issues": v.get("issues", []),
            }
            for k, v in stages.items()
        },
        "critical_issues": critical_issues,
        "notion": {
            "database_id": "2c8581e5-8a1e-8035-880b-e38cefc2f3ef",
            "title": f"종합 SEO 감사 보고서 - {domain} - {audit_date}",
            "category": "Comprehensive Audit",
            "priority": priority,
            "audit_id": audit_id,
        },
    }

    print(json.dumps(output, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()