Add SEO skills 33-34 and fix bugs in skills 19-34

New skills: - Skill 33: Site migration planner with redirect mapping and monitoring - Skill 34: Reporting dashboard with HTML charts and Korean executive reports Bug fixes (Skill 34 - report_aggregator.py): - Add audit_type fallback for skill identification (was only using audit_id prefix) - Extract health scores from nested data dict (technical_score, onpage_score, etc.) - Support subdomain matching in domain filter (blog.ourdigital.org matches ourdigital.org) - Skip self-referencing DASH- aggregated reports Bug fixes (Skill 20 - naver_serp_analyzer.py): - Remove VIEW tab selectors (removed by Naver in 2026) - Add new section detectors: books (도서), shortform (숏폼), influencer (인플루언서) Improvements (Skill 34 - dashboard/executive report): - Add Korean category labels for Chart.js charts (기술 SEO, 온페이지, etc.) - Add Korean trend labels (개선 중 ↑, 안정 →, 하락 중 ↓) - Add English→Korean issue description translation layer (20 common patterns) Documentation improvements: - Add Korean triggers to 4 skill descriptions (19, 25, 28, 31) - Expand Skill 32 SKILL.md from 40→143 lines (was 6/10, added workflow, output format, limitations) - Add output format examples to Skills 27 and 28 SKILL.md - Add limitations sections to Skills 27 and 28 - Update README.md, CLAUDE.md, AGENTS.md for skills 33-34 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-14 00:01:00 +09:00
parent dbfaa883cd
commit d2d0a2d460
37 changed files with 5462 additions and 56 deletions
--- a/custom-skills/34-seo-reporting-dashboard/code/scripts/report_aggregator.py
+++ b/custom-skills/34-seo-reporting-dashboard/code/scripts/report_aggregator.py
@@ -0,0 +1,744 @@
+"""
+Report Aggregator - Collect and normalize outputs from all SEO skills
+=====================================================================
+Purpose: Scan for recent audit outputs from skills 11-33, normalize data
+         formats, merge findings by domain/date, compute cross-skill health
+         scores, and identify top-priority issues across all audits.
+Python: 3.10+
+
+Usage:
+    python report_aggregator.py --domain https://example.com --json
+    python report_aggregator.py --domain https://example.com --output-dir ./audit_outputs --json
+    python report_aggregator.py --domain https://example.com --from 2025-01-01 --to 2025-03-31 --json
+    python report_aggregator.py --domain https://example.com --json --output report.json
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, date
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+
+from base_client import BaseAsyncClient, config
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Skill registry
+# ---------------------------------------------------------------------------
+
+SKILL_REGISTRY = {
+    11: {"name": "comprehensive-audit", "category": "comprehensive", "weight": 1.0},
+    12: {"name": "technical-audit", "category": "technical", "weight": 0.20},
+    13: {"name": "on-page-audit", "category": "on_page", "weight": 0.20},
+    14: {"name": "core-web-vitals", "category": "performance", "weight": 0.25},
+    15: {"name": "search-console", "category": "search_console", "weight": 0.10},
+    16: {"name": "schema-validator", "category": "schema", "weight": 0.15},
+    17: {"name": "schema-generator", "category": "schema", "weight": 0.10},
+    18: {"name": "local-audit", "category": "local", "weight": 0.10},
+    19: {"name": "keyword-strategy", "category": "keywords", "weight": 0.15},
+    20: {"name": "serp-analysis", "category": "keywords", "weight": 0.10},
+    21: {"name": "position-tracking", "category": "keywords", "weight": 0.15},
+    22: {"name": "link-building", "category": "links", "weight": 0.15},
+    23: {"name": "content-strategy", "category": "content", "weight": 0.15},
+    24: {"name": "ecommerce-seo", "category": "ecommerce", "weight": 0.10},
+    25: {"name": "kpi-framework", "category": "kpi", "weight": 0.20},
+    26: {"name": "international-seo", "category": "international", "weight": 0.10},
+    27: {"name": "ai-visibility", "category": "ai_search", "weight": 0.10},
+    28: {"name": "knowledge-graph", "category": "entity_seo", "weight": 0.10},
+    31: {"name": "competitor-intel", "category": "competitor", "weight": 0.15},
+    32: {"name": "crawl-budget", "category": "technical", "weight": 0.10},
+    33: {"name": "page-experience", "category": "performance", "weight": 0.10},
+}
+
+CATEGORY_WEIGHTS = {
+    "technical": 0.20,
+    "on_page": 0.15,
+    "performance": 0.15,
+    "content": 0.10,
+    "links": 0.10,
+    "local": 0.05,
+    "keywords": 0.10,
+    "competitor": 0.05,
+    "schema": 0.05,
+    "kpi": 0.05,
+}
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+@dataclass
+class SkillOutput:
+    """Normalized output from a single SEO skill."""
+    skill_id: int = 0
+    skill_name: str = ""
+    domain: str = ""
+    audit_date: str = ""
+    category: str = ""
+    data: dict[str, Any] = field(default_factory=dict)
+    health_score: float = 0.0
+    issues: list[dict[str, Any]] = field(default_factory=list)
+    wins: list[dict[str, Any]] = field(default_factory=list)
+    source_file: str = ""
+
+
+@dataclass
+class AggregatedReport:
+    """Full aggregated report from all SEO skill outputs."""
+    domain: str = ""
+    report_date: str = ""
+    skills_included: list[dict[str, Any]] = field(default_factory=list)
+    overall_health: float = 0.0
+    health_trend: str = "stable"
+    category_scores: dict[str, float] = field(default_factory=dict)
+    top_issues: list[dict[str, Any]] = field(default_factory=list)
+    top_wins: list[dict[str, Any]] = field(default_factory=list)
+    timeline: list[dict[str, Any]] = field(default_factory=list)
+    raw_outputs: list[dict[str, Any]] = field(default_factory=list)
+    audit_id: str = ""
+    timestamp: str = ""
+    errors: list[str] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Aggregator
+# ---------------------------------------------------------------------------
+
+class ReportAggregator(BaseAsyncClient):
+    """Aggregate outputs from all SEO skills into unified reports."""
+
+    NOTION_DB_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"
+
+    def __init__(self):
+        super().__init__(max_concurrent=5, requests_per_second=2.0)
+
+    @staticmethod
+    def _extract_domain(url: str) -> str:
+        """Extract bare domain from URL or return as-is if already bare."""
+        if "://" in url:
+            parsed = urlparse(url)
+            return parsed.netloc.lower().replace("www.", "")
+        return url.lower().replace("www.", "")
+
+    @staticmethod
+    def _generate_audit_id() -> str:
+        """Generate audit ID in DASH-YYYYMMDD-NNN format."""
+        now = datetime.now()
+        return f"DASH-{now.strftime('%Y%m%d')}-001"
+
+    def scan_local_outputs(
+        self,
+        output_dir: str,
+        domain: str | None = None,
+        date_from: str | None = None,
+        date_to: str | None = None,
+    ) -> list[SkillOutput]:
+        """Find JSON output files from other SEO skills in a directory.
+
+        Scans for files matching patterns from skills 11-33 and parses
+        them into normalized SkillOutput objects.
+        """
+        outputs: list[SkillOutput] = []
+        output_path = Path(output_dir)
+
+        if not output_path.exists():
+            self.logger.warning(f"Output directory not found: {output_dir}")
+            return outputs
+
+        # Scan for JSON files matching skill output patterns
+        json_files = list(output_path.rglob("*.json"))
+        self.logger.info(f"Found {len(json_files)} JSON files in {output_dir}")
+
+        for json_file in json_files:
+            try:
+                data = json.loads(json_file.read_text(encoding="utf-8"))
+
+                # Attempt to identify which skill produced this output
+                skill_output = self._identify_and_parse(data, str(json_file))
+
+                if skill_output is None:
+                    continue
+
+                # Filter by domain if specified (supports subdomains)
+                if domain:
+                    target_domain = self._extract_domain(domain)
+                    if skill_output.domain:
+                        file_domain = skill_output.domain
+                        # Match exact domain OR subdomains (e.g., blog.example.com matches example.com)
+                        if file_domain != target_domain and not file_domain.endswith("." + target_domain):
+                            continue
+
+                # Filter by date range
+                if date_from and skill_output.audit_date < date_from:
+                    continue
+                if date_to and skill_output.audit_date > date_to:
+                    continue
+
+                outputs.append(skill_output)
+                self.logger.info(
+                    f"Parsed output from skill {skill_output.skill_id} "
+                    f"({skill_output.skill_name}): {json_file.name}"
+                )
+
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                self.logger.warning(f"Could not parse {json_file}: {e}")
+
+        self.logger.info(f"Successfully parsed {len(outputs)} skill outputs")
+        return outputs
+
+    def _identify_and_parse(
+        self, data: dict[str, Any], source_file: str
+    ) -> SkillOutput | None:
+        """Identify which skill produced the output and parse it."""
+        skill_output = SkillOutput(source_file=source_file)
+
+        # Strategy 1: Parse skill from audit_id prefix (e.g., KPI-20250115-001)
+        audit_id = data.get("audit_id", "")
+        if isinstance(audit_id, str):
+            prefix_map = {
+                "COMP": 11, "TECH": 12, "PAGE": 13, "CWV": 14,
+                "GSC": 15, "SCHEMA": 16, "LOCAL": 18, "KW": 19,
+                "SERP": 20, "RANK": 21, "LINK": 22, "CONTENT": 23,
+                "ECOM": 24, "KPI": 25, "INTL": 26, "AI": 27,
+                "KG": 28, "COMPET": 31, "CRAWL": 32, "MIGR": 33,
+                "DASH": None,  # Skip self-referencing dashboard reports
+            }
+            for prefix, skill_id in prefix_map.items():
+                if audit_id.startswith(prefix):
+                    if skill_id is None:
+                        return None  # Skip aggregated reports
+                    skill_info = SKILL_REGISTRY.get(skill_id, {})
+                    skill_output.skill_id = skill_id
+                    skill_output.skill_name = skill_info.get("name", "unknown")
+                    skill_output.category = skill_info.get("category", "unknown")
+                    break
+
+        # Strategy 2: Fallback to audit_type field (used by our-seo-agent outputs)
+        if not skill_output.skill_id:
+            audit_type = data.get("audit_type", "")
+            if isinstance(audit_type, str) and audit_type:
+                type_map = {
+                    "comprehensive": 11, "technical": 12, "onpage": 13,
+                    "cwv": 14, "core-web-vitals": 14,
+                    "gsc": 15, "search-console": 15,
+                    "schema": 16, "local": 18,
+                    "keyword": 19, "serp": 20, "position": 21,
+                    "link": 22, "backlink": 22,
+                    "content": 23, "ecommerce": 24, "kpi": 25,
+                    "international": 26, "hreflang": 26,
+                    "ai-visibility": 27, "knowledge-graph": 28, "entity": 28,
+                    "competitor": 31, "crawl-budget": 32, "crawl": 32,
+                    "migration": 33,
+                }
+                for type_key, skill_id in type_map.items():
+                    if audit_type.lower() == type_key:
+                        skill_info = SKILL_REGISTRY.get(skill_id, {})
+                        skill_output.skill_id = skill_id
+                        skill_output.skill_name = skill_info.get("name", "unknown")
+                        skill_output.category = skill_info.get("category", "unknown")
+                        break
+
+        # Extract domain
+        for key in ("url", "target", "domain", "site"):
+            if key in data:
+                skill_output.domain = self._extract_domain(str(data[key]))
+                break
+
+        # Extract health score — check top-level first, then nested data dict
+        score_found = False
+        for key in ("health_score", "overall_health", "score"):
+            if key in data:
+                try:
+                    skill_output.health_score = float(data[key])
+                    score_found = True
+                except (ValueError, TypeError):
+                    pass
+                break
+
+        if not score_found:
+            nested = data.get("data", {})
+            if isinstance(nested, dict):
+                for key in ("technical_score", "onpage_score", "schema_score",
+                            "local_seo_score", "cwv_score", "performance_score",
+                            "content_score", "link_score", "keyword_score",
+                            "competitor_score", "efficiency_score",
+                            "health_score", "overall_score", "score"):
+                    val = nested.get(key)
+                    if val is not None:
+                        try:
+                            skill_output.health_score = float(val)
+                        except (ValueError, TypeError):
+                            pass
+                        break
+
+        # Extract audit date
+        for key in ("audit_date", "report_date", "timestamp", "found_date"):
+            if key in data:
+                date_str = str(data[key])[:10]
+                skill_output.audit_date = date_str
+                break
+
+        if not skill_output.audit_date:
+            skill_output.audit_date = date.today().isoformat()
+
+        # Extract issues
+        issues_raw = data.get("issues", data.get("critical_issues", []))
+        if isinstance(issues_raw, list):
+            for issue in issues_raw:
+                if isinstance(issue, dict):
+                    skill_output.issues.append(issue)
+                elif isinstance(issue, str):
+                    skill_output.issues.append({"description": issue, "severity": "medium"})
+
+        # Extract wins / recommendations
+        wins_raw = data.get("wins", data.get("top_wins", []))
+        if isinstance(wins_raw, list):
+            for win in wins_raw:
+                if isinstance(win, dict):
+                    skill_output.wins.append(win)
+                elif isinstance(win, str):
+                    skill_output.wins.append({"description": win})
+
+        # Store full data
+        skill_output.data = data
+
+        # Skip if no useful data was extracted
+        if not skill_output.skill_id and not skill_output.domain:
+            return None
+
+        return skill_output
+
+    async def query_notion_audits(
+        self,
+        domain: str,
+        date_from: str | None = None,
+        date_to: str | None = None,
+    ) -> list[SkillOutput]:
+        """Fetch past audit entries from Notion SEO Audit Log database.
+
+        In production, this uses the Notion MCP tools to query the database.
+        Returns normalized SkillOutput objects.
+        """
+        outputs: list[SkillOutput] = []
+        self.logger.info(
+            f"Querying Notion audits for {domain} "
+            f"(db: {self.NOTION_DB_ID}, from={date_from}, to={date_to})"
+        )
+
+        # In production, this would call:
+        # mcp__notion__query-database with filters for Site URL and Found Date
+        # For now, return empty list as placeholder
+        self.logger.info(
+            "Notion query is a placeholder; use MCP tools in Claude Desktop "
+            "or manually provide JSON files via --output-dir."
+        )
+
+        return outputs
+
+    def normalize_output(self, skill_output: SkillOutput) -> dict[str, Any]:
+        """Normalize a skill output into a unified format."""
+        return {
+            "skill_id": skill_output.skill_id,
+            "skill_name": skill_output.skill_name,
+            "domain": skill_output.domain,
+            "audit_date": skill_output.audit_date,
+            "category": skill_output.category,
+            "health_score": skill_output.health_score,
+            "issues_count": len(skill_output.issues),
+            "wins_count": len(skill_output.wins),
+            "issues": skill_output.issues[:10],
+            "wins": skill_output.wins[:10],
+        }
+
+    def compute_cross_skill_health(
+        self, outputs: list[SkillOutput]
+    ) -> tuple[float, dict[str, float]]:
+        """Compute weighted overall health score across all skills.
+
+        Returns (overall_score, category_scores_dict).
+        """
+        category_scores: dict[str, list[float]] = {}
+
+        for output in outputs:
+            cat = output.category
+            if cat and output.health_score > 0:
+                category_scores.setdefault(cat, []).append(output.health_score)
+
+        # Average scores per category
+        avg_category: dict[str, float] = {}
+        for cat, scores in category_scores.items():
+            avg_category[cat] = round(sum(scores) / len(scores), 1)
+
+        # Weighted overall score
+        total_weight = 0.0
+        weighted_sum = 0.0
+        for cat, avg_score in avg_category.items():
+            weight = CATEGORY_WEIGHTS.get(cat, 0.05)
+            weighted_sum += avg_score * weight
+            total_weight += weight
+
+        overall = round(weighted_sum / total_weight, 1) if total_weight > 0 else 0.0
+
+        return overall, avg_category
+
+    def identify_priorities(
+        self, outputs: list[SkillOutput]
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+        """Identify top issues and wins across all skill outputs.
+
+        Returns (top_issues, top_wins).
+        """
+        all_issues: list[dict[str, Any]] = []
+        all_wins: list[dict[str, Any]] = []
+
+        severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+
+        for output in outputs:
+            for issue in output.issues:
+                enriched = {
+                    **issue,
+                    "source_skill": output.skill_name,
+                    "source_skill_id": output.skill_id,
+                    "category": output.category,
+                }
+                all_issues.append(enriched)
+
+            for win in output.wins:
+                enriched = {
+                    **win,
+                    "source_skill": output.skill_name,
+                    "source_skill_id": output.skill_id,
+                    "category": output.category,
+                }
+                all_wins.append(enriched)
+
+        # Sort issues by severity
+        all_issues.sort(
+            key=lambda i: severity_order.get(
+                i.get("severity", "medium"), 2
+            )
+        )
+
+        return all_issues[:20], all_wins[:20]
+
+    def build_timeline(self, outputs: list[SkillOutput]) -> list[dict[str, Any]]:
+        """Build an audit history timeline from all skill outputs."""
+        timeline: list[dict[str, Any]] = []
+
+        for output in outputs:
+            entry = {
+                "date": output.audit_date,
+                "skill": output.skill_name,
+                "skill_id": output.skill_id,
+                "health_score": output.health_score,
+                "category": output.category,
+                "issues_count": len(output.issues),
+            }
+            timeline.append(entry)
+
+        # Sort by date descending
+        timeline.sort(key=lambda e: e.get("date", ""), reverse=True)
+        return timeline
+
+    async def run(
+        self,
+        domain: str,
+        output_dir: str | None = None,
+        date_from: str | None = None,
+        date_to: str | None = None,
+    ) -> AggregatedReport:
+        """Orchestrate the full report aggregation pipeline."""
+        target_domain = self._extract_domain(domain)
+        report = AggregatedReport(
+            domain=target_domain,
+            report_date=date.today().isoformat(),
+            audit_id=self._generate_audit_id(),
+            timestamp=datetime.now().isoformat(),
+        )
+
+        all_outputs: list[SkillOutput] = []
+
+        # Step 1: Scan local outputs
+        if output_dir:
+            self.logger.info(f"Step 1/5: Scanning local outputs in {output_dir}...")
+            local_outputs = self.scan_local_outputs(
+                output_dir, domain=target_domain,
+                date_from=date_from, date_to=date_to,
+            )
+            all_outputs.extend(local_outputs)
+        else:
+            self.logger.info("Step 1/5: No output directory specified, skipping local scan.")
+
+        # Step 2: Query Notion for past audits
+        self.logger.info("Step 2/5: Querying Notion for past audits...")
+        try:
+            notion_outputs = await self.query_notion_audits(
+                domain=target_domain,
+                date_from=date_from,
+                date_to=date_to,
+            )
+            all_outputs.extend(notion_outputs)
+        except Exception as e:
+            msg = f"Notion query error: {e}"
+            self.logger.error(msg)
+            report.errors.append(msg)
+
+        if not all_outputs:
+            self.logger.warning(
+                "No skill outputs found. Provide --output-dir with JSON files "
+                "from SEO skills 11-33, or ensure Notion audit log has entries."
+            )
+            report.errors.append("No skill outputs found to aggregate.")
+            return report
+
+        # Step 3: Normalize and compute health scores
+        self.logger.info(
+            f"Step 3/5: Normalizing {len(all_outputs)} skill outputs..."
+        )
+        report.skills_included = [
+            {
+                "skill_id": o.skill_id,
+                "skill_name": o.skill_name,
+                "audit_date": o.audit_date,
+            }
+            for o in all_outputs
+        ]
+        report.raw_outputs = [self.normalize_output(o) for o in all_outputs]
+
+        overall_health, category_scores = self.compute_cross_skill_health(all_outputs)
+        report.overall_health = overall_health
+        report.category_scores = category_scores
+
+        # Determine health trend from timeline
+        scores_by_date = sorted(
+            [(o.audit_date, o.health_score) for o in all_outputs if o.health_score > 0],
+            key=lambda x: x[0],
+        )
+        if len(scores_by_date) >= 2:
+            older_avg = sum(s for _, s in scores_by_date[:len(scores_by_date)//2]) / max(len(scores_by_date)//2, 1)
+            newer_avg = sum(s for _, s in scores_by_date[len(scores_by_date)//2:]) / max(len(scores_by_date) - len(scores_by_date)//2, 1)
+            if newer_avg > older_avg + 3:
+                report.health_trend = "improving"
+            elif newer_avg < older_avg - 3:
+                report.health_trend = "declining"
+            else:
+                report.health_trend = "stable"
+
+        # Step 4: Identify priorities
+        self.logger.info("Step 4/5: Identifying top issues and wins...")
+        top_issues, top_wins = self.identify_priorities(all_outputs)
+        report.top_issues = top_issues
+        report.top_wins = top_wins
+
+        # Step 5: Build timeline
+        self.logger.info("Step 5/5: Building audit history timeline...")
+        report.timeline = self.build_timeline(all_outputs)
+
+        self.logger.info(
+            f"Aggregation complete: {len(all_outputs)} skills, "
+            f"health={report.overall_health}/100, "
+            f"trend={report.health_trend}, "
+            f"issues={len(report.top_issues)}, wins={len(report.top_wins)}"
+        )
+
+        return report
+
+
+# ---------------------------------------------------------------------------
+# Output formatting
+# ---------------------------------------------------------------------------
+
+def _format_text_report(report: AggregatedReport) -> str:
+    """Format aggregated report as human-readable text."""
+    lines: list[str] = []
+    lines.append("=" * 70)
+    lines.append("  SEO REPORTING DASHBOARD - AGGREGATED REPORT")
+    lines.append(f"  Domain: {report.domain}")
+    lines.append(f"  Report Date: {report.report_date}")
+    lines.append(f"  Audit ID: {report.audit_id}")
+    lines.append("=" * 70)
+
+    # Health score
+    lines.append("")
+    lines.append(f"  Overall Health: {report.overall_health}/100 ({report.health_trend})")
+    lines.append("-" * 50)
+
+    # Category scores
+    if report.category_scores:
+        lines.append("")
+        lines.append("--- CATEGORY SCORES ---")
+        for cat, score in sorted(
+            report.category_scores.items(), key=lambda x: x[1], reverse=True
+        ):
+            bar = "#" * int(score / 5) + "." * (20 - int(score / 5))
+            lines.append(f"  {cat:<20} [{bar}] {score:.1f}/100")
+
+    # Skills included
+    if report.skills_included:
+        lines.append("")
+        lines.append("--- SKILLS INCLUDED ---")
+        for skill in report.skills_included:
+            lines.append(
+                f"  [{skill['skill_id']:>2}] {skill['skill_name']:<30} "
+                f"({skill['audit_date']})"
+            )
+
+    # Top issues
+    if report.top_issues:
+        lines.append("")
+        lines.append("--- TOP ISSUES ---")
+        for i, issue in enumerate(report.top_issues[:10], 1):
+            severity = issue.get("severity", "medium").upper()
+            desc = issue.get("description", "No description")
+            cat = issue.get("category", "")
+            lines.append(f"  {i:>2}. [{severity}] ({cat}) {desc}")
+
+    # Top wins
+    if report.top_wins:
+        lines.append("")
+        lines.append("--- TOP WINS ---")
+        for i, win in enumerate(report.top_wins[:10], 1):
+            desc = win.get("description", "No description")
+            cat = win.get("category", "")
+            lines.append(f"  {i:>2}. ({cat}) {desc}")
+
+    # Timeline
+    if report.timeline:
+        lines.append("")
+        lines.append("--- AUDIT TIMELINE ---")
+        lines.append(f"  {'Date':<12} {'Skill':<25} {'Score':>8} {'Issues':>8}")
+        lines.append("  " + "-" * 55)
+        for entry in report.timeline[:15]:
+            lines.append(
+                f"  {entry['date']:<12} {entry['skill']:<25} "
+                f"{entry['health_score']:>7.1f} {entry['issues_count']:>7}"
+            )
+
+    # Errors
+    if report.errors:
+        lines.append("")
+        lines.append("--- ERRORS ---")
+        for err in report.errors:
+            lines.append(f"  - {err}")
+
+    lines.append("")
+    lines.append("=" * 70)
+    return "\n".join(lines)
+
+
+def _serialize_report(report: AggregatedReport) -> dict:
+    """Convert report to JSON-serializable dict."""
+    return {
+        "domain": report.domain,
+        "report_date": report.report_date,
+        "overall_health": report.overall_health,
+        "health_trend": report.health_trend,
+        "skills_included": report.skills_included,
+        "category_scores": report.category_scores,
+        "top_issues": report.top_issues,
+        "top_wins": report.top_wins,
+        "timeline": report.timeline,
+        "raw_outputs": report.raw_outputs,
+        "audit_id": report.audit_id,
+        "timestamp": report.timestamp,
+        "errors": report.errors if report.errors else None,
+    }
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="SEO Report Aggregator - Collect and normalize outputs from all SEO skills",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""\
+Examples:
+  python report_aggregator.py --domain https://example.com --json
+  python report_aggregator.py --domain https://example.com --output-dir ./audit_outputs --json
+  python report_aggregator.py --domain https://example.com --from 2025-01-01 --to 2025-03-31 --json
+        """,
+    )
+    parser.add_argument(
+        "--domain",
+        required=True,
+        help="Target domain to aggregate reports for",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default=None,
+        help="Directory containing JSON outputs from SEO skills",
+    )
+    parser.add_argument(
+        "--from",
+        type=str,
+        default=None,
+        dest="date_from",
+        help="Start date for filtering (YYYY-MM-DD)",
+    )
+    parser.add_argument(
+        "--to",
+        type=str,
+        default=None,
+        dest="date_to",
+        help="End date for filtering (YYYY-MM-DD)",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        default=False,
+        help="Output in JSON format",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Save output to file path",
+    )
+    return parser.parse_args(argv)
+
+
+async def async_main(args: argparse.Namespace) -> None:
+    aggregator = ReportAggregator()
+
+    report = await aggregator.run(
+        domain=args.domain,
+        output_dir=args.output_dir,
+        date_from=args.date_from,
+        date_to=args.date_to,
+    )
+
+    if args.json:
+        output_str = json.dumps(
+            _serialize_report(report), indent=2, ensure_ascii=False
+        )
+    else:
+        output_str = _format_text_report(report)
+
+    if args.output:
+        Path(args.output).write_text(output_str, encoding="utf-8")
+        logger.info(f"Report saved to {args.output}")
+    else:
+        print(output_str)
+
+    aggregator.print_stats()
+
+
+def main() -> None:
+    args = parse_args()
+    asyncio.run(async_main(args))
+
+
+if __name__ == "__main__":
+    main()