Add SEO skills 19-28, 31-32 with full Python implementations

12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:05:59 +09:00
parent 159f7ec3f7
commit a3ff965b87
125 changed files with 25948 additions and 173 deletions
--- a/custom-skills/23-seo-content-strategy/code/scripts/content_auditor.py
+++ b/custom-skills/23-seo-content-strategy/code/scripts/content_auditor.py
@@ -0,0 +1,716 @@
+"""
+Content Auditor - SEO Content Inventory & Performance Analysis
+==============================================================
+Purpose: Build content inventory, score performance, detect decay,
+         classify content types, and analyze Korean content patterns.
+Python: 3.10+
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import re
+import sys
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timedelta
+from typing import Any
+from urllib.parse import urlparse
+
+import aiohttp
+import requests
+from bs4 import BeautifulSoup
+
+from base_client import BaseAsyncClient, config
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ContentPage:
+    """Single content page with performance metrics."""
+    url: str
+    title: str = ""
+    content_type: str = "other"
+    word_count: int = 0
+    traffic: int = 0
+    keywords_count: int = 0
+    backlinks: int = 0
+    performance_score: float = 0.0
+    last_modified: str = ""
+    is_decaying: bool = False
+    decay_rate: float = 0.0
+    korean_pattern: str = ""
+    topics: list[str] = field(default_factory=list)
+
+
+@dataclass
+class ContentInventory:
+    """Aggregated content inventory summary."""
+    total_pages: int = 0
+    by_type: dict[str, int] = field(default_factory=dict)
+    avg_performance_score: float = 0.0
+    avg_word_count: float = 0.0
+    pages: list[ContentPage] = field(default_factory=list)
+    freshness_distribution: dict[str, int] = field(default_factory=dict)
+
+
+@dataclass
+class ContentAuditResult:
+    """Full content audit result."""
+    url: str
+    timestamp: str = ""
+    content_inventory: ContentInventory = field(default_factory=ContentInventory)
+    top_performers: list[ContentPage] = field(default_factory=list)
+    decaying_content: list[ContentPage] = field(default_factory=list)
+    korean_content_analysis: dict[str, Any] = field(default_factory=dict)
+    recommendations: list[str] = field(default_factory=list)
+    errors: list[str] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# URL pattern rules for content type classification
+# ---------------------------------------------------------------------------
+
+CONTENT_TYPE_PATTERNS = {
+    "blog": [
+        r"/blog/", r"/post/", r"/posts/", r"/article/", r"/articles/",
+        r"/news/", r"/magazine/", r"/stories/", r"/insights/",
+        r"/블로그/", r"/소식/", r"/뉴스/",
+    ],
+    "product": [
+        r"/product/", r"/products/", r"/shop/", r"/store/",
+        r"/item/", r"/goods/", r"/catalog/",
+        r"/제품/", r"/상품/", r"/쇼핑/",
+    ],
+    "service": [
+        r"/service/", r"/services/", r"/solutions/", r"/offering/",
+        r"/진료/", r"/서비스/", r"/시술/", r"/치료/",
+    ],
+    "landing": [
+        r"/lp/", r"/landing/", r"/campaign/", r"/promo/",
+        r"/event/", r"/이벤트/", r"/프로모션/",
+    ],
+    "resource": [
+        r"/resource/", r"/resources/", r"/guide/", r"/guides/",
+        r"/whitepaper/", r"/ebook/", r"/download/", r"/faq/",
+        r"/help/", r"/support/", r"/가이드/", r"/자료/",
+    ],
+}
+
+KOREAN_CONTENT_PATTERNS = {
+    "naver_blog_style": [
+        r"후기", r"리뷰", r"체험", r"솔직후기", r"방문후기",
+        r"사용후기", r"이용후기",
+    ],
+    "listicle": [
+        r"추천", r"베스트", r"TOP\s*\d+", r"\d+선", r"\d+가지",
+        r"모음", r"정리", r"비교",
+    ],
+    "how_to": [
+        r"방법", r"하는\s*법", r"하는\s*방법", r"가이드",
+        r"따라하기", r"시작하기", r"알아보기",
+    ],
+    "informational": [
+        r"이란", r"뜻", r"의미", r"차이", r"비교",
+        r"장단점", r"효과", r"부작용", r"비용", r"가격",
+    ],
+}
+
+
+# ---------------------------------------------------------------------------
+# ContentAuditor
+# ---------------------------------------------------------------------------
+
+class ContentAuditor(BaseAsyncClient):
+    """Content auditor using Ahrefs API and sitemap crawling."""
+
+    def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
+        super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
+        self.session: aiohttp.ClientSession | None = None
+
+    async def _ensure_session(self) -> aiohttp.ClientSession:
+        if self.session is None or self.session.closed:
+            timeout = aiohttp.ClientTimeout(total=30)
+            self.session = aiohttp.ClientSession(timeout=timeout)
+        return self.session
+
+    async def close(self) -> None:
+        if self.session and not self.session.closed:
+            await self.session.close()
+
+    # ------------------------------------------------------------------
+    # Ahrefs data retrieval
+    # ------------------------------------------------------------------
+
+    async def get_top_pages(self, url: str, limit: int = 100) -> list[dict]:
+        """
+        Retrieve top pages via Ahrefs site-explorer-top-pages.
+
+        Returns list of dicts with keys: url, traffic, keywords, value, top_keyword.
+        """
+        self.logger.info(f"Fetching top pages from Ahrefs for {url}")
+        target = urlparse(url).netloc or url
+        try:
+            # Ahrefs MCP call: site-explorer-top-pages
+            # In MCP context this would be called by the agent.
+            # Standalone fallback: use REST API if AHREFS_API_KEY is set.
+            api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
+            if not api_key:
+                self.logger.warning("AHREFS_API_KEY not set; returning empty top pages")
+                return []
+
+            resp = requests.get(
+                "https://api.ahrefs.com/v3/site-explorer/top-pages",
+                params={"target": target, "limit": limit, "select": "url,traffic,keywords,value,top_keyword"},
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=30,
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            pages = data.get("pages", data.get("items", []))
+            self.logger.info(f"Retrieved {len(pages)} top pages")
+            return pages
+        except Exception as exc:
+            self.logger.warning(f"Ahrefs top-pages lookup failed: {exc}")
+            return []
+
+    async def get_pages_by_traffic(self, url: str, limit: int = 100) -> list[dict]:
+        """
+        Retrieve pages sorted by organic traffic via Ahrefs site-explorer-pages-by-traffic.
+
+        Returns list of dicts with keys: url, traffic, keywords, top_keyword.
+        """
+        self.logger.info(f"Fetching pages-by-traffic from Ahrefs for {url}")
+        target = urlparse(url).netloc or url
+        try:
+            api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
+            if not api_key:
+                self.logger.warning("AHREFS_API_KEY not set; returning empty traffic pages")
+                return []
+
+            resp = requests.get(
+                "https://api.ahrefs.com/v3/site-explorer/pages-by-traffic",
+                params={"target": target, "limit": limit, "select": "url,traffic,keywords,top_keyword"},
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=30,
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            pages = data.get("pages", data.get("items", []))
+            self.logger.info(f"Retrieved {len(pages)} pages by traffic")
+            return pages
+        except Exception as exc:
+            self.logger.warning(f"Ahrefs pages-by-traffic lookup failed: {exc}")
+            return []
+
+    # ------------------------------------------------------------------
+    # Sitemap crawling
+    # ------------------------------------------------------------------
+
+    async def crawl_sitemap(self, url: str) -> list[str]:
+        """Discover URLs from sitemap.xml."""
+        sitemap_urls_to_try = [
+            f"{url.rstrip('/')}/sitemap.xml",
+            f"{url.rstrip('/')}/sitemap_index.xml",
+            f"{url.rstrip('/')}/post-sitemap.xml",
+        ]
+        discovered: list[str] = []
+        session = await self._ensure_session()
+
+        for sitemap_url in sitemap_urls_to_try:
+            try:
+                async with session.get(sitemap_url) as resp:
+                    if resp.status != 200:
+                        continue
+                    text = await resp.text()
+                    soup = BeautifulSoup(text, "lxml-xml")
+
+                    # Sitemap index
+                    sitemaps = soup.find_all("sitemap")
+                    if sitemaps:
+                        for sm in sitemaps:
+                            loc = sm.find("loc")
+                            if loc:
+                                child_urls = await self._parse_sitemap(session, loc.text.strip())
+                                discovered.extend(child_urls)
+                    else:
+                        urls = soup.find_all("url")
+                        for u in urls:
+                            loc = u.find("loc")
+                            if loc:
+                                discovered.append(loc.text.strip())
+
+                    if discovered:
+                        self.logger.info(f"Discovered {len(discovered)} URLs from {sitemap_url}")
+                        break
+            except Exception as exc:
+                self.logger.debug(f"Failed to fetch {sitemap_url}: {exc}")
+
+        return list(set(discovered))
+
+    async def _parse_sitemap(self, session: aiohttp.ClientSession, sitemap_url: str) -> list[str]:
+        """Parse a single sitemap XML and return URLs."""
+        urls: list[str] = []
+        try:
+            async with session.get(sitemap_url) as resp:
+                if resp.status != 200:
+                    return urls
+                text = await resp.text()
+                soup = BeautifulSoup(text, "lxml-xml")
+                for u in soup.find_all("url"):
+                    loc = u.find("loc")
+                    if loc:
+                        urls.append(loc.text.strip())
+        except Exception as exc:
+            self.logger.debug(f"Failed to parse sitemap {sitemap_url}: {exc}")
+        return urls
+
+    # ------------------------------------------------------------------
+    # Content type classification
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def classify_content_type(url: str, title: str = "") -> str:
+        """
+        Classify content type based on URL path patterns and title.
+
+        Returns one of: blog, product, service, landing, resource, other.
+        """
+        combined = f"{url.lower()} {title.lower()}"
+        scores: dict[str, int] = {}
+
+        for ctype, patterns in CONTENT_TYPE_PATTERNS.items():
+            score = 0
+            for pattern in patterns:
+                if re.search(pattern, combined, re.IGNORECASE):
+                    score += 1
+            if score > 0:
+                scores[ctype] = score
+
+        if not scores:
+            return "other"
+        return max(scores, key=scores.get)
+
+    # ------------------------------------------------------------------
+    # Performance scoring
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def score_performance(page: ContentPage) -> float:
+        """
+        Compute composite performance score (0-100) from traffic, keywords, backlinks.
+
+        Weights:
+        - Traffic: 50% (log-scaled, 10k+ traffic = max)
+        - Keywords count: 30% (log-scaled, 500+ = max)
+        - Backlinks: 20% (log-scaled, 100+ = max)
+        """
+        import math
+
+        traffic_score = min(100, (math.log10(max(page.traffic, 1)) / math.log10(10000)) * 100)
+        keywords_score = min(100, (math.log10(max(page.keywords_count, 1)) / math.log10(500)) * 100)
+        backlinks_score = min(100, (math.log10(max(page.backlinks, 1)) / math.log10(100)) * 100)
+
+        composite = (traffic_score * 0.50) + (keywords_score * 0.30) + (backlinks_score * 0.20)
+        return round(min(100, max(0, composite)), 1)
+
+    # ------------------------------------------------------------------
+    # Content decay detection
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def detect_decay(pages: list[ContentPage], threshold: float = -20.0) -> list[ContentPage]:
+        """
+        Flag pages with declining traffic trend.
+
+        Uses a simple heuristic: pages with low performance score relative to
+        their keyword count indicate potential decay. In production, historical
+        traffic data from Ahrefs metrics-history would be used.
+
+        Args:
+            pages: List of content pages with metrics.
+            threshold: Decay rate threshold (percentage decline).
+
+        Returns:
+            List of pages flagged as decaying.
+        """
+        decaying: list[ContentPage] = []
+        for page in pages:
+            # Heuristic: high keyword count but low traffic suggests decay
+            if page.keywords_count > 10 and page.traffic < 50:
+                page.is_decaying = True
+                page.decay_rate = -50.0 if page.traffic == 0 else round(
+                    -((page.keywords_count * 10 - page.traffic) / max(page.keywords_count * 10, 1)) * 100, 1
+                )
+                if page.decay_rate <= threshold:
+                    decaying.append(page)
+            elif page.performance_score < 20 and page.keywords_count > 5:
+                page.is_decaying = True
+                page.decay_rate = round(-max(30, 100 - page.performance_score * 2), 1)
+                if page.decay_rate <= threshold:
+                    decaying.append(page)
+
+        decaying.sort(key=lambda p: p.decay_rate)
+        return decaying
+
+    # ------------------------------------------------------------------
+    # Freshness assessment
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def analyze_freshness(pages: list[ContentPage]) -> dict[str, int]:
+        """
+        Categorize pages by freshness based on last_modified dates.
+
+        Returns distribution: fresh (< 3 months), aging (3-12 months),
+        stale (> 12 months), unknown (no date).
+        """
+        now = datetime.now()
+        distribution = {"fresh": 0, "aging": 0, "stale": 0, "unknown": 0}
+
+        for page in pages:
+            if not page.last_modified:
+                distribution["unknown"] += 1
+                continue
+            try:
+                # Try common date formats
+                for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S%z"):
+                    try:
+                        modified = datetime.strptime(
+                            page.last_modified.replace("+00:00", "").replace("Z", ""), fmt.replace("%z", "")
+                        )
+                        break
+                    except ValueError:
+                        continue
+                else:
+                    distribution["unknown"] += 1
+                    continue
+
+                age = now - modified
+                if age < timedelta(days=90):
+                    distribution["fresh"] += 1
+                elif age < timedelta(days=365):
+                    distribution["aging"] += 1
+                else:
+                    distribution["stale"] += 1
+            except Exception:
+                distribution["unknown"] += 1
+
+        return distribution
+
+    # ------------------------------------------------------------------
+    # Korean content pattern identification
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def identify_korean_patterns(pages: list[ContentPage]) -> dict[str, Any]:
+        """
+        Detect Korean content patterns across pages.
+
+        Identifies Naver Blog style review content, listicles,
+        how-to guides, and informational content patterns.
+
+        Returns summary with counts and example URLs per pattern.
+        """
+        results: dict[str, Any] = {
+            "total_korean_content": 0,
+            "patterns": {},
+        }
+
+        for pattern_name, keywords in KOREAN_CONTENT_PATTERNS.items():
+            matches: list[dict[str, str]] = []
+            for page in pages:
+                combined = f"{page.url} {page.title}"
+                for keyword in keywords:
+                    if re.search(keyword, combined, re.IGNORECASE):
+                        matches.append({"url": page.url, "title": page.title, "matched_keyword": keyword})
+                        break
+
+            results["patterns"][pattern_name] = {
+                "count": len(matches),
+                "examples": matches[:5],
+            }
+
+        korean_urls = set()
+        for pattern_data in results["patterns"].values():
+            for example in pattern_data["examples"]:
+                korean_urls.add(example["url"])
+        results["total_korean_content"] = len(korean_urls)
+
+        return results
+
+    # ------------------------------------------------------------------
+    # Orchestration
+    # ------------------------------------------------------------------
+
+    async def audit(
+        self,
+        url: str,
+        detect_decay_flag: bool = False,
+        content_type_filter: str | None = None,
+        limit: int = 200,
+    ) -> ContentAuditResult:
+        """
+        Run full content audit: inventory, scoring, decay, Korean patterns.
+
+        Args:
+            url: Target website URL.
+            detect_decay_flag: Whether to run decay detection.
+            content_type_filter: Filter by content type (blog, product, etc.).
+            limit: Maximum pages to analyze.
+
+        Returns:
+            ContentAuditResult with inventory, top performers, decay, analysis.
+        """
+        result = ContentAuditResult(
+            url=url,
+            timestamp=datetime.now().isoformat(),
+        )
+
+        self.logger.info(f"Starting content audit for {url}")
+
+        # 1. Gather pages from Ahrefs and sitemap
+        top_pages_data, traffic_pages_data, sitemap_urls = await asyncio.gather(
+            self.get_top_pages(url, limit=limit),
+            self.get_pages_by_traffic(url, limit=limit),
+            self.crawl_sitemap(url),
+        )
+
+        # 2. Merge and deduplicate pages
+        page_map: dict[str, ContentPage] = {}
+
+        for item in top_pages_data:
+            page_url = item.get("url", "")
+            if not page_url:
+                continue
+            page_map[page_url] = ContentPage(
+                url=page_url,
+                title=item.get("top_keyword", ""),
+                traffic=int(item.get("traffic", 0)),
+                keywords_count=int(item.get("keywords", 0)),
+                backlinks=int(item.get("value", 0)),
+            )
+
+        for item in traffic_pages_data:
+            page_url = item.get("url", "")
+            if not page_url:
+                continue
+            if page_url in page_map:
+                existing = page_map[page_url]
+                existing.traffic = max(existing.traffic, int(item.get("traffic", 0)))
+                existing.keywords_count = max(existing.keywords_count, int(item.get("keywords", 0)))
+            else:
+                page_map[page_url] = ContentPage(
+                    url=page_url,
+                    title=item.get("top_keyword", ""),
+                    traffic=int(item.get("traffic", 0)),
+                    keywords_count=int(item.get("keywords", 0)),
+                )
+
+        # Add sitemap URLs not already present
+        for s_url in sitemap_urls:
+            if s_url not in page_map:
+                page_map[s_url] = ContentPage(url=s_url)
+
+        # 3. Classify and score
+        all_pages: list[ContentPage] = []
+        for page in page_map.values():
+            page.content_type = self.classify_content_type(page.url, page.title)
+            page.performance_score = self.score_performance(page)
+            all_pages.append(page)
+
+        # 4. Filter by content type if requested
+        if content_type_filter:
+            all_pages = [p for p in all_pages if p.content_type == content_type_filter]
+
+        # 5. Build inventory
+        by_type: dict[str, int] = {}
+        for page in all_pages:
+            by_type[page.content_type] = by_type.get(page.content_type, 0) + 1
+
+        avg_score = (
+            sum(p.performance_score for p in all_pages) / len(all_pages)
+            if all_pages else 0.0
+        )
+        avg_word_count = (
+            sum(p.word_count for p in all_pages) / len(all_pages)
+            if all_pages else 0.0
+        )
+
+        freshness = self.analyze_freshness(all_pages)
+
+        result.content_inventory = ContentInventory(
+            total_pages=len(all_pages),
+            by_type=by_type,
+            avg_performance_score=round(avg_score, 1),
+            avg_word_count=round(avg_word_count, 1),
+            pages=sorted(all_pages, key=lambda p: p.performance_score, reverse=True)[:limit],
+            freshness_distribution=freshness,
+        )
+
+        # 6. Top performers
+        result.top_performers = sorted(all_pages, key=lambda p: p.performance_score, reverse=True)[:20]
+
+        # 7. Decay detection
+        if detect_decay_flag:
+            result.decaying_content = self.detect_decay(all_pages)
+
+        # 8. Korean content analysis
+        result.korean_content_analysis = self.identify_korean_patterns(all_pages)
+
+        # 9. Recommendations
+        result.recommendations = self._generate_recommendations(result)
+
+        self.logger.info(
+            f"Audit complete: {len(all_pages)} pages, "
+            f"{len(result.top_performers)} top performers, "
+            f"{len(result.decaying_content)} decaying"
+        )
+
+        return result
+
+    @staticmethod
+    def _generate_recommendations(result: ContentAuditResult) -> list[str]:
+        """Generate actionable recommendations from audit data."""
+        recs: list[str] = []
+        inv = result.content_inventory
+
+        # Low average score
+        if inv.avg_performance_score < 30:
+            recs.append(
+                "전체 콘텐츠 평균 성과 점수가 낮습니다 ({:.0f}/100). "
+                "상위 콘텐츠 패턴을 분석하여 저성과 페이지를 개선하세요.".format(inv.avg_performance_score)
+            )
+
+        # Stale content
+        stale = inv.freshness_distribution.get("stale", 0)
+        total = inv.total_pages or 1
+        if stale / total > 0.3:
+            recs.append(
+                f"오래된 콘텐츠가 {stale}개 ({stale * 100 // total}%)입니다. "
+                "콘텐츠 업데이트 또는 통합을 고려하세요."
+            )
+
+        # Decaying content
+        if len(result.decaying_content) > 5:
+            recs.append(
+                f"트래픽이 감소하는 콘텐츠가 {len(result.decaying_content)}개 감지되었습니다. "
+                "상위 감소 페이지부터 콘텐츠 리프레시를 진행하세요."
+            )
+
+        # Content type balance
+        blog_count = inv.by_type.get("blog", 0)
+        if blog_count == 0:
+            recs.append(
+                "블로그 콘텐츠가 없습니다. SEO 트래픽 확보를 위해 "
+                "블로그 콘텐츠 전략을 수립하세요."
+            )
+
+        # Korean content opportunities
+        korean = result.korean_content_analysis
+        review_count = korean.get("patterns", {}).get("naver_blog_style", {}).get("count", 0)
+        if review_count == 0:
+            recs.append(
+                "후기/리뷰 콘텐츠가 없습니다. 한국 시장에서 후기 콘텐츠는 "
+                "전환율에 큰 영향을 미치므로 후기 콘텐츠 생성을 권장합니다."
+            )
+
+        if not recs:
+            recs.append("현재 콘텐츠 전략이 양호합니다. 지속적인 모니터링을 권장합니다.")
+
+        return recs
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="SEO Content Auditor - inventory, scoring, and decay detection",
+    )
+    parser.add_argument("--url", required=True, help="Target website URL")
+    parser.add_argument("--decay", action="store_true", help="Enable content decay detection")
+    parser.add_argument("--type", dest="content_type", help="Filter by content type (blog, product, service, landing, resource)")
+    parser.add_argument("--limit", type=int, default=200, help="Maximum pages to analyze (default: 200)")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    parser.add_argument("--output", help="Save output to file")
+    return parser
+
+
+def format_text_report(result: ContentAuditResult) -> str:
+    """Format audit result as human-readable text."""
+    lines: list[str] = []
+    lines.append(f"## Content Audit: {result.url}")
+    lines.append(f"**Date**: {result.timestamp[:10]}")
+    lines.append("")
+
+    inv = result.content_inventory
+    lines.append(f"### Content Inventory")
+    lines.append(f"- Total pages: {inv.total_pages}")
+    lines.append(f"- Average performance score: {inv.avg_performance_score}/100")
+    lines.append(f"- Content types: {json.dumps(inv.by_type, ensure_ascii=False)}")
+    lines.append(f"- Freshness: {json.dumps(inv.freshness_distribution, ensure_ascii=False)}")
+    lines.append("")
+
+    lines.append("### Top Performers")
+    for i, page in enumerate(result.top_performers[:10], 1):
+        lines.append(f"  {i}. [{page.performance_score:.0f}] {page.url} (traffic: {page.traffic})")
+    lines.append("")
+
+    if result.decaying_content:
+        lines.append("### Decaying Content")
+        for i, page in enumerate(result.decaying_content[:10], 1):
+            lines.append(f"  {i}. [{page.decay_rate:+.0f}%] {page.url} (traffic: {page.traffic})")
+        lines.append("")
+
+    if result.korean_content_analysis.get("patterns"):
+        lines.append("### Korean Content Patterns")
+        for pattern_name, data in result.korean_content_analysis["patterns"].items():
+            lines.append(f"  - {pattern_name}: {data['count']} pages")
+        lines.append("")
+
+    lines.append("### Recommendations")
+    for i, rec in enumerate(result.recommendations, 1):
+        lines.append(f"  {i}. {rec}")
+
+    return "\n".join(lines)
+
+
+async def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    auditor = ContentAuditor()
+    try:
+        result = await auditor.audit(
+            url=args.url,
+            detect_decay_flag=args.decay,
+            content_type_filter=args.content_type,
+            limit=args.limit,
+        )
+
+        if args.json:
+            output = json.dumps(asdict(result), ensure_ascii=False, indent=2, default=str)
+        else:
+            output = format_text_report(result)
+
+        if args.output:
+            with open(args.output, "w", encoding="utf-8") as f:
+                f.write(output)
+            logger.info(f"Output saved to {args.output}")
+        else:
+            print(output)
+
+    finally:
+        await auditor.close()
+        auditor.print_stats()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())