New skill that runs a 6-stage SEO audit pipeline (Technical, On-Page, Core Web Vitals, Schema, Local SEO, Search Console) and produces a unified health score (0-100) with weighted categories. Includes Python orchestrator script, slash command, and Notion integration for Korean audit reports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
349 lines
11 KiB
Python
349 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SEO Comprehensive Audit Orchestrator
|
|
|
|
Runs 6 sub-skill audits sequentially, merges results, and computes a weighted
|
|
health score (0-100).
|
|
|
|
Usage:
|
|
python seo_audit_orchestrator.py --url https://example.com
|
|
python seo_audit_orchestrator.py --url https://example.com --skip-local --skip-gsc
|
|
python seo_audit_orchestrator.py --url https://example.com --json
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
# Health score weights (must sum to 1.0)
|
|
WEIGHTS = {
|
|
"technical": 0.20,
|
|
"on_page": 0.20,
|
|
"core_web_vitals": 0.25,
|
|
"schema": 0.15,
|
|
"local_seo": 0.10,
|
|
"search_console": 0.10,
|
|
}
|
|
|
|
GRADE_THRESHOLDS = [
|
|
(90, "A", "Excellent"),
|
|
(80, "B+", "Good"),
|
|
(70, "B", "Above Average"),
|
|
(60, "C", "Needs Improvement"),
|
|
(40, "D", "Poor"),
|
|
(0, "F", "Critical"),
|
|
]
|
|
|
|
|
|
def get_repo_root():
|
|
"""Find the git repository root."""
|
|
try:
|
|
result = subprocess.run(
|
|
["git", "rev-parse", "--show-toplevel"],
|
|
capture_output=True, text=True, check=True
|
|
)
|
|
return Path(result.stdout.strip())
|
|
except subprocess.CalledProcessError:
|
|
# Fallback: walk up from this script's location
|
|
current = Path(__file__).resolve().parent
|
|
while current != current.parent:
|
|
if (current / ".git").exists():
|
|
return current
|
|
current = current.parent
|
|
print("Error: Could not find git repository root.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
def run_script(script_path, args, stage_name):
|
|
"""Run a sub-skill script and return parsed JSON output."""
|
|
if not script_path.exists():
|
|
return {
|
|
"status": "skipped",
|
|
"reason": f"Script not found: {script_path}",
|
|
"score": None,
|
|
"issues": [],
|
|
}
|
|
|
|
cmd = [sys.executable, str(script_path)] + args + ["--json"]
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=120
|
|
)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
return json.loads(result.stdout.strip())
|
|
else:
|
|
return {
|
|
"status": "error",
|
|
"reason": result.stderr.strip() or f"Exit code {result.returncode}",
|
|
"score": None,
|
|
"issues": [],
|
|
}
|
|
except subprocess.TimeoutExpired:
|
|
return {
|
|
"status": "timeout",
|
|
"reason": f"{stage_name} timed out after 120s",
|
|
"score": None,
|
|
"issues": [],
|
|
}
|
|
except json.JSONDecodeError as e:
|
|
return {
|
|
"status": "error",
|
|
"reason": f"Invalid JSON output: {e}",
|
|
"score": None,
|
|
"issues": [],
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"status": "error",
|
|
"reason": str(e),
|
|
"score": None,
|
|
"issues": [],
|
|
}
|
|
|
|
|
|
def extract_score(result):
|
|
"""Extract a 0-100 score from a sub-skill result."""
|
|
if isinstance(result, dict):
|
|
# Try common score fields
|
|
for key in ("score", "health_score", "overall_score"):
|
|
if key in result and isinstance(result[key], (int, float)):
|
|
return min(100, max(0, result[key]))
|
|
# Try nested core_web_vitals score
|
|
if "core_web_vitals" in result:
|
|
cwv = result["core_web_vitals"]
|
|
if isinstance(cwv, dict) and "score" in cwv:
|
|
return min(100, max(0, cwv["score"]))
|
|
return None
|
|
|
|
|
|
def extract_issues(result):
|
|
"""Extract issues list from a sub-skill result."""
|
|
if isinstance(result, dict):
|
|
issues = result.get("issues", [])
|
|
if isinstance(issues, list):
|
|
return issues
|
|
return []
|
|
|
|
|
|
def compute_health_score(stages, skipped):
|
|
"""Compute weighted health score, redistributing skipped stage weights."""
|
|
active_weights = {k: v for k, v in WEIGHTS.items() if k not in skipped}
|
|
total_active_weight = sum(active_weights.values())
|
|
|
|
if total_active_weight == 0:
|
|
return 0
|
|
|
|
weighted_sum = 0
|
|
for stage_name, weight in active_weights.items():
|
|
stage_data = stages.get(stage_name, {})
|
|
score = stage_data.get("score")
|
|
if score is not None:
|
|
normalized_weight = weight / total_active_weight
|
|
weighted_sum += score * normalized_weight
|
|
|
|
return round(weighted_sum, 1)
|
|
|
|
|
|
def get_grade(score):
|
|
"""Return grade and status for a given score."""
|
|
for threshold, grade, status in GRADE_THRESHOLDS:
|
|
if score >= threshold:
|
|
return grade, status
|
|
return "F", "Critical"
|
|
|
|
|
|
def get_priority(score):
|
|
"""Return Notion priority based on health score."""
|
|
if score < 40:
|
|
return "Critical"
|
|
elif score < 60:
|
|
return "High"
|
|
elif score < 80:
|
|
return "Medium"
|
|
return "Low"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Comprehensive SEO Audit Orchestrator")
|
|
parser.add_argument("--url", required=True, help="URL to audit")
|
|
parser.add_argument("--skip-local", action="store_true", help="Skip Local SEO stage")
|
|
parser.add_argument("--skip-gsc", action="store_true", help="Skip Search Console stage")
|
|
parser.add_argument("--json", action="store_true", help="Output JSON only")
|
|
parser.add_argument("--skills-dir", help="Path to custom-skills directory")
|
|
args = parser.parse_args()
|
|
|
|
url = args.url
|
|
domain = urlparse(url).netloc or url
|
|
audit_date = datetime.now().strftime("%Y-%m-%d")
|
|
audit_id = f"COMP-{datetime.now().strftime('%Y%m%d')}-001"
|
|
|
|
# Resolve skills directory
|
|
if args.skills_dir:
|
|
skills_dir = Path(args.skills_dir)
|
|
else:
|
|
repo_root = get_repo_root()
|
|
skills_dir = repo_root / "custom-skills"
|
|
|
|
skipped = set()
|
|
if args.skip_local:
|
|
skipped.add("local_seo")
|
|
if args.skip_gsc:
|
|
skipped.add("search_console")
|
|
|
|
stages = {}
|
|
|
|
# Stage 1: Technical SEO
|
|
if not args.json:
|
|
print("[1/6] Running Technical SEO audit...")
|
|
robots_result = run_script(
|
|
skills_dir / "12-seo-technical-audit/code/scripts/robots_checker.py",
|
|
["--url", url], "robots_checker"
|
|
)
|
|
sitemap_url = f"{url.rstrip('/')}/sitemap.xml"
|
|
sitemap_result = run_script(
|
|
skills_dir / "12-seo-technical-audit/code/scripts/sitemap_validator.py",
|
|
["--url", sitemap_url], "sitemap_validator"
|
|
)
|
|
tech_score = extract_score(robots_result)
|
|
sitemap_score = extract_score(sitemap_result)
|
|
if tech_score is not None and sitemap_score is not None:
|
|
combined_tech_score = round((tech_score + sitemap_score) / 2)
|
|
elif tech_score is not None:
|
|
combined_tech_score = tech_score
|
|
elif sitemap_score is not None:
|
|
combined_tech_score = sitemap_score
|
|
else:
|
|
combined_tech_score = None
|
|
stages["technical"] = {
|
|
"score": combined_tech_score,
|
|
"weight": WEIGHTS["technical"],
|
|
"issues": extract_issues(robots_result) + extract_issues(sitemap_result),
|
|
"raw": {"robots": robots_result, "sitemap": sitemap_result},
|
|
}
|
|
|
|
# Stage 2: On-Page SEO
|
|
if not args.json:
|
|
print("[2/6] Running On-Page SEO audit...")
|
|
on_page_result = run_script(
|
|
skills_dir / "13-seo-on-page-audit/code/scripts/page_analyzer.py",
|
|
["--url", url], "page_analyzer"
|
|
)
|
|
stages["on_page"] = {
|
|
"score": extract_score(on_page_result),
|
|
"weight": WEIGHTS["on_page"],
|
|
"issues": extract_issues(on_page_result),
|
|
"raw": on_page_result,
|
|
}
|
|
|
|
# Stage 3: Core Web Vitals
|
|
if not args.json:
|
|
print("[3/6] Running Core Web Vitals audit...")
|
|
cwv_result = run_script(
|
|
skills_dir / "14-seo-core-web-vitals/code/scripts/pagespeed_client.py",
|
|
["--url", url], "pagespeed_client"
|
|
)
|
|
stages["core_web_vitals"] = {
|
|
"score": extract_score(cwv_result),
|
|
"weight": WEIGHTS["core_web_vitals"],
|
|
"issues": extract_issues(cwv_result),
|
|
"raw": cwv_result,
|
|
}
|
|
|
|
# Stage 4: Schema Validation
|
|
if not args.json:
|
|
print("[4/6] Running Schema validation...")
|
|
schema_result = run_script(
|
|
skills_dir / "16-seo-schema-validator/code/scripts/schema_validator.py",
|
|
["--url", url], "schema_validator"
|
|
)
|
|
stages["schema"] = {
|
|
"score": extract_score(schema_result),
|
|
"weight": WEIGHTS["schema"],
|
|
"issues": extract_issues(schema_result),
|
|
"raw": schema_result,
|
|
}
|
|
|
|
# Stage 5: Local SEO
|
|
if "local_seo" not in skipped:
|
|
if not args.json:
|
|
print("[5/6] Local SEO stage (prompt-driven, skipping in script mode)...")
|
|
stages["local_seo"] = {
|
|
"score": None,
|
|
"weight": WEIGHTS["local_seo"],
|
|
"issues": [],
|
|
"raw": {"status": "prompt_driven", "note": "Local SEO requires interactive analysis via 18-seo-local-audit"},
|
|
}
|
|
else:
|
|
if not args.json:
|
|
print("[5/6] Skipping Local SEO...")
|
|
|
|
# Stage 6: Search Console
|
|
if "search_console" not in skipped:
|
|
if not args.json:
|
|
print("[6/6] Running Search Console analysis...")
|
|
gsc_result = run_script(
|
|
skills_dir / "15-seo-search-console/code/scripts/gsc_client.py",
|
|
["--url", url], "gsc_client"
|
|
)
|
|
stages["search_console"] = {
|
|
"score": extract_score(gsc_result),
|
|
"weight": WEIGHTS["search_console"],
|
|
"issues": extract_issues(gsc_result),
|
|
"raw": gsc_result,
|
|
}
|
|
else:
|
|
if not args.json:
|
|
print("[6/6] Skipping Search Console...")
|
|
|
|
# Compute health score
|
|
health_score = compute_health_score(stages, skipped)
|
|
grade, status = get_grade(health_score)
|
|
priority = get_priority(health_score)
|
|
|
|
# Collect critical issues
|
|
critical_issues = []
|
|
for stage_name, stage_data in stages.items():
|
|
for issue in stage_data.get("issues", []):
|
|
if isinstance(issue, dict) and issue.get("type") in ("error", "critical"):
|
|
critical_issues.append({**issue, "stage": stage_name})
|
|
|
|
# Build output
|
|
output = {
|
|
"url": url,
|
|
"domain": domain,
|
|
"audit_date": audit_date,
|
|
"audit_id": audit_id,
|
|
"health_score": health_score,
|
|
"grade": grade,
|
|
"status": status,
|
|
"priority": priority,
|
|
"skipped_stages": list(skipped),
|
|
"stages": {
|
|
k: {
|
|
"score": v.get("score"),
|
|
"weight": v.get("weight"),
|
|
"issues": v.get("issues", []),
|
|
}
|
|
for k, v in stages.items()
|
|
},
|
|
"critical_issues": critical_issues,
|
|
"notion": {
|
|
"database_id": "2c8581e5-8a1e-8035-880b-e38cefc2f3ef",
|
|
"title": f"종합 SEO 감사 보고서 - {domain} - {audit_date}",
|
|
"category": "Comprehensive Audit",
|
|
"priority": priority,
|
|
"audit_id": audit_id,
|
|
},
|
|
}
|
|
|
|
print(json.dumps(output, ensure_ascii=False, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|