Add SEO skills 19-28, 31-32 with full Python implementations

12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:05:59 +09:00
parent 159f7ec3f7
commit a3ff965b87
125 changed files with 25948 additions and 173 deletions
--- a/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py
+++ b/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py
--- a/custom-skills/22-seo-link-building/code/scripts/base_client.py
+++ b/custom-skills/22-seo-link-building/code/scripts/base_client.py
@@ -0,0 +1,207 @@
+"""
+Base Client - Shared async client utilities
+===========================================
+Purpose: Rate-limited async operations for API clients
+Python: 3.10+
+"""
+
+import asyncio
+import logging
+import os
+from asyncio import Semaphore
+from datetime import datetime
+from typing import Any, Callable, TypeVar
+
+from dotenv import load_dotenv
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+
+# Load environment variables
+load_dotenv()
+
+# Logging setup
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+T = TypeVar("T")
+
+
+class RateLimiter:
+    """Rate limiter using token bucket algorithm."""
+
+    def __init__(self, rate: float, per: float = 1.0):
+        """
+        Initialize rate limiter.
+
+        Args:
+            rate: Number of requests allowed
+            per: Time period in seconds (default: 1 second)
+        """
+        self.rate = rate
+        self.per = per
+        self.tokens = rate
+        self.last_update = datetime.now()
+        self._lock = asyncio.Lock()
+
+    async def acquire(self) -> None:
+        """Acquire a token, waiting if necessary."""
+        async with self._lock:
+            now = datetime.now()
+            elapsed = (now - self.last_update).total_seconds()
+            self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
+            self.last_update = now
+
+            if self.tokens < 1:
+                wait_time = (1 - self.tokens) * (self.per / self.rate)
+                await asyncio.sleep(wait_time)
+                self.tokens = 0
+            else:
+                self.tokens -= 1
+
+
+class BaseAsyncClient:
+    """Base class for async API clients with rate limiting."""
+
+    def __init__(
+        self,
+        max_concurrent: int = 5,
+        requests_per_second: float = 3.0,
+        logger: logging.Logger | None = None,
+    ):
+        """
+        Initialize base client.
+
+        Args:
+            max_concurrent: Maximum concurrent requests
+            requests_per_second: Rate limit
+            logger: Logger instance
+        """
+        self.semaphore = Semaphore(max_concurrent)
+        self.rate_limiter = RateLimiter(requests_per_second)
+        self.logger = logger or logging.getLogger(self.__class__.__name__)
+        self.stats = {
+            "requests": 0,
+            "success": 0,
+            "errors": 0,
+            "retries": 0,
+        }
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(Exception),
+    )
+    async def _rate_limited_request(
+        self,
+        coro: Callable[[], Any],
+    ) -> Any:
+        """Execute a request with rate limiting and retry."""
+        async with self.semaphore:
+            await self.rate_limiter.acquire()
+            self.stats["requests"] += 1
+            try:
+                result = await coro()
+                self.stats["success"] += 1
+                return result
+            except Exception as e:
+                self.stats["errors"] += 1
+                self.logger.error(f"Request failed: {e}")
+                raise
+
+    async def batch_requests(
+        self,
+        requests: list[Callable[[], Any]],
+        desc: str = "Processing",
+    ) -> list[Any]:
+        """Execute multiple requests concurrently."""
+        try:
+            from tqdm.asyncio import tqdm
+            has_tqdm = True
+        except ImportError:
+            has_tqdm = False
+
+        async def execute(req: Callable) -> Any:
+            try:
+                return await self._rate_limited_request(req)
+            except Exception as e:
+                return {"error": str(e)}
+
+        tasks = [execute(req) for req in requests]
+
+        if has_tqdm:
+            results = []
+            for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
+                result = await coro
+                results.append(result)
+            return results
+        else:
+            return await asyncio.gather(*tasks, return_exceptions=True)
+
+    def print_stats(self) -> None:
+        """Print request statistics."""
+        self.logger.info("=" * 40)
+        self.logger.info("Request Statistics:")
+        self.logger.info(f"  Total Requests: {self.stats['requests']}")
+        self.logger.info(f"  Successful: {self.stats['success']}")
+        self.logger.info(f"  Errors: {self.stats['errors']}")
+        self.logger.info("=" * 40)
+
+
+class ConfigManager:
+    """Manage API configuration and credentials."""
+
+    def __init__(self):
+        load_dotenv()
+
+    @property
+    def google_credentials_path(self) -> str | None:
+        """Get Google service account credentials path."""
+        # Prefer SEO-specific credentials, fallback to general credentials
+        seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
+        if os.path.exists(seo_creds):
+            return seo_creds
+        return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+
+    @property
+    def pagespeed_api_key(self) -> str | None:
+        """Get PageSpeed Insights API key."""
+        return os.getenv("PAGESPEED_API_KEY")
+
+    @property
+    def custom_search_api_key(self) -> str | None:
+        """Get Custom Search API key."""
+        return os.getenv("CUSTOM_SEARCH_API_KEY")
+
+    @property
+    def custom_search_engine_id(self) -> str | None:
+        """Get Custom Search Engine ID."""
+        return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+
+    @property
+    def notion_token(self) -> str | None:
+        """Get Notion API token."""
+        return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
+
+    def validate_google_credentials(self) -> bool:
+        """Validate Google credentials are configured."""
+        creds_path = self.google_credentials_path
+        if not creds_path:
+            return False
+        return os.path.exists(creds_path)
+
+    def get_required(self, key: str) -> str:
+        """Get required environment variable or raise error."""
+        value = os.getenv(key)
+        if not value:
+            raise ValueError(f"Missing required environment variable: {key}")
+        return value
+
+
+# Singleton config instance
+config = ConfigManager()
--- a/custom-skills/22-seo-link-building/code/scripts/link_gap_finder.py
+++ b/custom-skills/22-seo-link-building/code/scripts/link_gap_finder.py
@@ -0,0 +1,802 @@
+"""
+Link Gap Finder - Competitor link gap analysis
+===============================================
+Purpose: Identify link building opportunities by finding domains that link
+         to competitors but not to the target site via Ahrefs MCP.
+Python: 3.10+
+Usage:
+    python link_gap_finder.py --target https://example.com --competitor https://comp1.com --json
+    python link_gap_finder.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --min-dr 30 --json
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import logging
+import re
+import sys
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from typing import Any
+from urllib.parse import urlparse
+
+import aiohttp
+import pandas as pd
+from rich.console import Console
+from rich.table import Table
+
+from base_client import BaseAsyncClient, config
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+logger = logging.getLogger("link_gap_finder")
+console = Console()
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+AHREFS_BASE = "https://api.ahrefs.com/v3"
+
+# Source category detection patterns
+SOURCE_CATEGORY_PATTERNS: dict[str, list[str]] = {
+    "news": [
+        "news", "press", "media", "journal", "herald", "times", "post",
+        "gazette", "tribune", "daily", "chosun", "donga", "joongang",
+        "hani", "khan", "yna", "yonhap", "reuters", "bloomberg",
+        "techcrunch", "verge", "wired", "arstechnica", "bbc", "cnn",
+    ],
+    "blog": [
+        "blog", "wordpress", "medium.com", "tistory.com", "brunch.co.kr",
+        "blog.naver.com", "tumblr", "blogger", "substack", "ghost.io",
+        "velog.io", "dev.to",
+    ],
+    "forum": [
+        "forum", "community", "discuss", "reddit.com", "quora.com",
+        "stackexchange", "stackoverflow", "cafe.naver.com", "dcinside",
+        "fmkorea", "clien", "ppomppu", "theqoo", "ruliweb",
+    ],
+    "directory": [
+        "directory", "listing", "yellowpages", "yelp", "bbb.org",
+        "clutch.co", "g2.com", "capterra", "trustpilot", "glassdoor",
+        "dmoz", "aboutus", "hotfrog", "manta", "superpages",
+    ],
+    "edu_gov": [
+        ".edu", ".gov", ".ac.kr", ".go.kr", ".or.kr",
+    ],
+    "social": [
+        "facebook.com", "twitter.com", "x.com", "linkedin.com",
+        "instagram.com", "youtube.com", "pinterest.com", "tiktok.com",
+    ],
+    "korean_platform": [
+        "naver.com", "daum.net", "kakao.com", "tistory.com",
+        "brunch.co.kr", "zum.com", "nate.com",
+    ],
+}
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LinkOpportunity:
+    """A single link building opportunity from gap analysis."""
+    domain: str
+    dr: float = 0.0
+    traffic: int = 0
+    linked_competitors: list[str] = field(default_factory=list)
+    competitor_count: int = 0
+    not_linked_target: bool = True
+    category: str = "other"
+    feasibility_score: float = 0.0
+    impact_score: float = 0.0
+    overall_score: float = 0.0
+    backlinks_to_competitors: int = 0
+    country: str = ""
+    top_anchor: str = ""
+
+
+@dataclass
+class GapSummary:
+    """Summary statistics for the gap analysis."""
+    total_opportunities: int = 0
+    avg_dr: float = 0.0
+    high_dr_count: int = 0
+    category_breakdown: dict[str, int] = field(default_factory=dict)
+    top_countries: list[dict[str, Any]] = field(default_factory=list)
+    total_competitor_refdomains: dict[str, int] = field(default_factory=dict)
+    target_refdomains_count: int = 0
+
+
+@dataclass
+class LinkGapResult:
+    """Complete link gap analysis result."""
+    target_url: str
+    target_domain: str = ""
+    competitor_urls: list[str] = field(default_factory=list)
+    competitor_domains: list[str] = field(default_factory=list)
+    target_dr: float = 0.0
+    opportunities: list[LinkOpportunity] = field(default_factory=list)
+    summary: GapSummary | None = None
+    top_opportunities: list[LinkOpportunity] = field(default_factory=list)
+    issues: list[dict[str, str]] = field(default_factory=list)
+    recommendations: list[str] = field(default_factory=list)
+    timestamp: str = ""
+
+
+# ---------------------------------------------------------------------------
+# LinkGapFinder
+# ---------------------------------------------------------------------------
+
+
+class LinkGapFinder(BaseAsyncClient):
+    """Find link building opportunities by analyzing competitor backlink gaps."""
+
+    def __init__(self, **kwargs):
+        super().__init__(max_concurrent=5, requests_per_second=2.0, **kwargs)
+        self.session: aiohttp.ClientSession | None = None
+
+    # -- Ahrefs MCP helper ---------------------------------------------------
+
+    async def _call_ahrefs(
+        self, endpoint: str, params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Call Ahrefs API endpoint.
+
+        In MCP context this calls mcp__ahrefs__<endpoint>.
+        For standalone use, falls back to REST API with token.
+        """
+        api_token = config.get_required("AHREFS_API_TOKEN") if not self.session else None
+
+        if self.session and api_token:
+            url = f"{AHREFS_BASE}/{endpoint}"
+            headers = {"Authorization": f"Bearer {api_token}"}
+            async with self.session.get(url, headers=headers, params=params) as resp:
+                resp.raise_for_status()
+                return await resp.json()
+
+        logger.warning(
+            f"Ahrefs call to '{endpoint}' - use MCP tool "
+            f"mcp__ahrefs__{endpoint.replace('-', '_')} in Claude Desktop"
+        )
+        return {"endpoint": endpoint, "params": params, "data": [], "note": "mcp_stub"}
+
+    # -- Core methods --------------------------------------------------------
+
+    async def get_referring_domains(
+        self, url: str, limit: int = 1000
+    ) -> list[dict[str, Any]]:
+        """Fetch referring domains for a given URL/domain."""
+        target = urlparse(url).netloc or url
+        result = await self._call_ahrefs(
+            "site-explorer-referring-domains",
+            {"target": target, "mode": "domain", "limit": limit, "order_by": "domain_rating:desc"},
+        )
+        domains = result.get("data", result.get("refdomains", []))
+        if isinstance(domains, dict):
+            domains = domains.get("refdomains", [])
+        return domains if isinstance(domains, list) else []
+
+    async def get_domain_rating(self, url: str) -> float:
+        """Fetch Domain Rating for a URL."""
+        target = urlparse(url).netloc or url
+        result = await self._call_ahrefs(
+            "site-explorer-domain-rating",
+            {"target": target},
+        )
+        data = result.get("data", result) if isinstance(result, dict) else {}
+        return data.get("domain_rating", 0.0)
+
+    async def get_domain_metrics(self, url: str) -> dict[str, Any]:
+        """Fetch comprehensive domain metrics."""
+        target = urlparse(url).netloc or url
+        result = await self._call_ahrefs(
+            "site-explorer-backlinks-stats",
+            {"target": target, "mode": "domain"},
+        )
+        data = result.get("data", result) if isinstance(result, dict) else {}
+        return {
+            "total_backlinks": data.get("live", 0),
+            "referring_domains": data.get("live_refdomains", 0),
+            "dofollow": data.get("live_dofollow", 0),
+        }
+
+    def find_gaps(
+        self,
+        target_domains: set[str],
+        competitor_domain_maps: dict[str, set[str]],
+    ) -> list[dict[str, Any]]:
+        """
+        Find domains linking to competitors but not to the target.
+
+        Returns a list of gap domains with metadata about which
+        competitors they link to.
+        """
+        # Collect all competitor referring domains
+        all_competitor_domains: dict[str, list[str]] = {}
+
+        for comp_name, comp_domains in competitor_domain_maps.items():
+            for domain in comp_domains:
+                domain_lower = domain.lower()
+                if domain_lower not in all_competitor_domains:
+                    all_competitor_domains[domain_lower] = []
+                all_competitor_domains[domain_lower].append(comp_name)
+
+        # Find gaps: in competitor set but not in target set
+        target_set_lower = {d.lower() for d in target_domains}
+        gaps = []
+
+        for domain, linked_comps in all_competitor_domains.items():
+            if domain not in target_set_lower:
+                gaps.append({
+                    "domain": domain,
+                    "linked_competitors": linked_comps,
+                    "competitor_count": len(set(linked_comps)),
+                })
+
+        # Sort by number of competitors linking (more = higher priority)
+        gaps.sort(key=lambda g: g["competitor_count"], reverse=True)
+        return gaps
+
+    def score_opportunities(
+        self,
+        gaps: list[dict[str, Any]],
+        refdomains_data: dict[str, list[dict[str, Any]]],
+        total_competitors: int,
+    ) -> list[LinkOpportunity]:
+        """
+        Score gap opportunities by DR, traffic, relevance, and feasibility.
+
+        Scoring factors:
+        - DR weight: Higher DR = more impactful link
+        - Competitor overlap: More competitors linking = easier to acquire
+        - Category bonus: Editorial/news links valued higher
+        - Traffic bonus: Higher traffic domains valued more
+        """
+        # Build a lookup of domain metadata from competitor refdomains
+        domain_metadata: dict[str, dict[str, Any]] = {}
+        for comp_url, domains in refdomains_data.items():
+            for rd in domains:
+                d = rd.get("domain", rd.get("domain_from", "")).lower()
+                if d and d not in domain_metadata:
+                    domain_metadata[d] = {
+                        "dr": rd.get("domain_rating", rd.get("dr", 0)),
+                        "traffic": rd.get("organic_traffic", rd.get("traffic", 0)),
+                        "backlinks": rd.get("backlinks", 0),
+                        "country": rd.get("country", ""),
+                    }
+
+        opportunities = []
+
+        for gap in gaps:
+            domain = gap["domain"]
+            meta = domain_metadata.get(domain, {})
+
+            dr = meta.get("dr", 0)
+            traffic = meta.get("traffic", 0)
+            comp_count = gap["competitor_count"]
+
+            # Category detection
+            category = self._detect_category(domain)
+
+            # Feasibility score (0-100)
+            # Higher if: more competitors link (social proof), blog/forum (easier outreach)
+            feasibility = min(100, (
+                (comp_count / max(total_competitors, 1)) * 40  # Competitor overlap
+                + (30 if category in ("blog", "forum", "directory") else 10)  # Category ease
+                + (20 if dr < 60 else 5)  # Lower DR = easier to get link from
+                + (10 if traffic > 0 else 0)  # Active site bonus
+            ))
+
+            # Impact score (0-100)
+            # Higher if: high DR, high traffic, editorial/news
+            impact = min(100, (
+                min(dr, 100) * 0.4  # DR weight (40%)
+                + min(traffic / 1000, 30)  # Traffic weight (up to 30)
+                + (20 if category in ("news", "edu_gov") else 5)  # Authority bonus
+                + (comp_count / max(total_competitors, 1)) * 10  # Validation
+            ))
+
+            # Overall score = weighted average
+            overall = round(feasibility * 0.4 + impact * 0.6, 1)
+
+            opp = LinkOpportunity(
+                domain=domain,
+                dr=dr,
+                traffic=traffic,
+                linked_competitors=gap["linked_competitors"],
+                competitor_count=comp_count,
+                not_linked_target=True,
+                category=category,
+                feasibility_score=round(feasibility, 1),
+                impact_score=round(impact, 1),
+                overall_score=overall,
+                backlinks_to_competitors=meta.get("backlinks", 0),
+                country=meta.get("country", ""),
+            )
+            opportunities.append(opp)
+
+        # Sort by overall score descending
+        opportunities.sort(key=lambda o: o.overall_score, reverse=True)
+        return opportunities
+
+    def categorize_sources(
+        self, opportunities: list[LinkOpportunity]
+    ) -> dict[str, list[LinkOpportunity]]:
+        """Group opportunities by source category."""
+        categorized: dict[str, list[LinkOpportunity]] = {}
+        for opp in opportunities:
+            cat = opp.category
+            if cat not in categorized:
+                categorized[cat] = []
+            categorized[cat].append(opp)
+        return categorized
+
+    # -- Orchestration -------------------------------------------------------
+
+    async def analyze(
+        self,
+        target_url: str,
+        competitor_urls: list[str],
+        min_dr: float = 0,
+        country_filter: str = "",
+        limit: int = 1000,
+    ) -> LinkGapResult:
+        """Orchestrate full link gap analysis."""
+        target_domain = urlparse(target_url).netloc or target_url
+        comp_domains = [urlparse(c).netloc or c for c in competitor_urls]
+
+        logger.info(f"Starting link gap analysis: {target_domain} vs {comp_domains}")
+
+        result = LinkGapResult(
+            target_url=target_url,
+            target_domain=target_domain,
+            competitor_urls=competitor_urls,
+            competitor_domains=comp_domains,
+            timestamp=datetime.now().isoformat(),
+        )
+
+        # Phase 1: Fetch target DR and referring domains
+        logger.info("Phase 1: Fetching target data...")
+        target_dr_task = self.get_domain_rating(target_url)
+        target_rd_task = self.get_referring_domains(target_url, limit=limit)
+
+        target_dr, target_refdomains = await asyncio.gather(
+            target_dr_task, target_rd_task, return_exceptions=True,
+        )
+
+        result.target_dr = target_dr if isinstance(target_dr, (int, float)) else 0
+        target_rd_list = target_refdomains if isinstance(target_refdomains, list) else []
+        target_domain_set = {
+            rd.get("domain", rd.get("domain_from", "")).lower()
+            for rd in target_rd_list
+            if rd.get("domain", rd.get("domain_from", ""))
+        }
+
+        # Phase 2: Fetch competitor referring domains (parallel)
+        logger.info("Phase 2: Fetching competitor data...")
+        comp_rd_tasks = {
+            comp_url: self.get_referring_domains(comp_url, limit=limit)
+            for comp_url in competitor_urls
+        }
+        comp_results = {}
+        for comp_url, task in comp_rd_tasks.items():
+            try:
+                comp_rd = await task
+                comp_results[comp_url] = comp_rd if isinstance(comp_rd, list) else []
+            except Exception as e:
+                logger.error(f"Failed to fetch refdomains for {comp_url}: {e}")
+                comp_results[comp_url] = []
+
+        # Build competitor domain maps
+        competitor_domain_maps: dict[str, set[str]] = {}
+        for comp_url, rd_list in comp_results.items():
+            comp_domain = urlparse(comp_url).netloc or comp_url
+            competitor_domain_maps[comp_domain] = {
+                rd.get("domain", rd.get("domain_from", "")).lower()
+                for rd in rd_list
+                if rd.get("domain", rd.get("domain_from", ""))
+            }
+
+        # Phase 3: Find gaps
+        logger.info("Phase 3: Finding link gaps...")
+        raw_gaps = self.find_gaps(target_domain_set, competitor_domain_maps)
+        logger.info(f"Found {len(raw_gaps)} gap domains")
+
+        # Phase 4: Score opportunities
+        logger.info("Phase 4: Scoring opportunities...")
+        opportunities = self.score_opportunities(
+            raw_gaps, comp_results, len(competitor_urls)
+        )
+
+        # Apply filters
+        if min_dr > 0:
+            opportunities = [o for o in opportunities if o.dr >= min_dr]
+
+        if country_filter:
+            country_lower = country_filter.lower()
+            opportunities = [
+                o for o in opportunities
+                if o.country.lower() == country_lower or not o.country
+            ]
+
+        result.opportunities = opportunities
+        result.top_opportunities = opportunities[:50]
+
+        # Phase 5: Build summary
+        logger.info("Phase 5: Building summary...")
+        result.summary = self._build_summary(
+            opportunities, comp_results, len(target_rd_list)
+        )
+
+        # Phase 6: Generate recommendations
+        self._generate_issues(result)
+        self._generate_recommendations(result)
+
+        logger.info(f"Link gap analysis complete: {len(opportunities)} opportunities found")
+        return result
+
+    # -- Helpers -------------------------------------------------------------
+
+    @staticmethod
+    def _detect_category(domain: str) -> str:
+        """Detect the category of a domain based on patterns."""
+        domain_lower = domain.lower()
+
+        for category, patterns in SOURCE_CATEGORY_PATTERNS.items():
+            for pattern in patterns:
+                if pattern in domain_lower:
+                    return category
+
+        # Fallback heuristics
+        if domain_lower.endswith((".edu", ".ac.kr", ".gov", ".go.kr")):
+            return "edu_gov"
+
+        return "other"
+
+    def _build_summary(
+        self,
+        opportunities: list[LinkOpportunity],
+        comp_results: dict[str, list],
+        target_rd_count: int,
+    ) -> GapSummary:
+        """Build summary statistics from opportunities."""
+        summary = GapSummary()
+        summary.total_opportunities = len(opportunities)
+        summary.target_refdomains_count = target_rd_count
+
+        if opportunities:
+            dr_values = [o.dr for o in opportunities if o.dr > 0]
+            summary.avg_dr = round(sum(dr_values) / max(len(dr_values), 1), 1)
+            summary.high_dr_count = sum(1 for o in opportunities if o.dr >= 50)
+
+            # Category breakdown
+            cat_counts: dict[str, int] = {}
+            country_counts: dict[str, int] = {}
+            for opp in opportunities:
+                cat_counts[opp.category] = cat_counts.get(opp.category, 0) + 1
+                if opp.country:
+                    country_counts[opp.country] = country_counts.get(opp.country, 0) + 1
+
+            summary.category_breakdown = dict(
+                sorted(cat_counts.items(), key=lambda x: x[1], reverse=True)
+            )
+            summary.top_countries = sorted(
+                [{"country": k, "count": v} for k, v in country_counts.items()],
+                key=lambda x: x["count"], reverse=True,
+            )[:10]
+
+        # Competitor refdomains counts
+        for comp_url, rd_list in comp_results.items():
+            comp_domain = urlparse(comp_url).netloc or comp_url
+            summary.total_competitor_refdomains[comp_domain] = len(rd_list)
+
+        return summary
+
+    def _generate_issues(self, result: LinkGapResult) -> None:
+        """Generate issues based on gap analysis."""
+        issues = []
+
+        if result.summary:
+            # Large gap warning
+            if result.summary.total_opportunities > 500:
+                issues.append({
+                    "type": "warning",
+                    "category": "link_gap",
+                    "message": (
+                        f"Large link gap: {result.summary.total_opportunities} domains "
+                        "link to competitors but not to you"
+                    ),
+                })
+
+            # High-DR gap
+            if result.summary.high_dr_count > 50:
+                issues.append({
+                    "type": "error",
+                    "category": "authority_gap",
+                    "message": (
+                        f"{result.summary.high_dr_count} high-authority domains (DR 50+) "
+                        "link to competitors but not to you"
+                    ),
+                })
+
+            # Category-specific gaps
+            news_gap = result.summary.category_breakdown.get("news", 0)
+            if news_gap > 20:
+                issues.append({
+                    "type": "warning",
+                    "category": "pr_gap",
+                    "message": f"{news_gap} news/media domains link to competitors - consider digital PR",
+                })
+
+            edu_gap = result.summary.category_breakdown.get("edu_gov", 0)
+            if edu_gap > 5:
+                issues.append({
+                    "type": "info",
+                    "category": "edu_gov_gap",
+                    "message": f"{edu_gap} .edu/.gov domains link to competitors - high-authority opportunity",
+                })
+
+        result.issues = issues
+
+    def _generate_recommendations(self, result: LinkGapResult) -> None:
+        """Generate actionable recommendations."""
+        recs = []
+
+        if not result.opportunities:
+            recs.append("No significant link gaps found. Consider expanding competitor list.")
+            result.recommendations = recs
+            return
+
+        # Top opportunities by category
+        categorized = self.categorize_sources(result.top_opportunities[:100])
+
+        if "news" in categorized:
+            news_count = len(categorized["news"])
+            top_news = [o.domain for o in categorized["news"][:3]]
+            recs.append(
+                f"Pursue {news_count} news/media link opportunities. "
+                f"Top targets: {', '.join(top_news)}. "
+                "Strategy: create newsworthy content, press releases, expert commentary."
+            )
+
+        if "blog" in categorized:
+            blog_count = len(categorized["blog"])
+            recs.append(
+                f"Target {blog_count} blog/content site opportunities via guest posting, "
+                "collaborative content, and expert interviews."
+            )
+
+        if "directory" in categorized:
+            dir_count = len(categorized["directory"])
+            recs.append(
+                f"Submit to {dir_count} relevant directories and listing sites. "
+                "Low effort, moderate impact for local SEO signals."
+            )
+
+        if "forum" in categorized:
+            forum_count = len(categorized["forum"])
+            recs.append(
+                f"Engage in {forum_count} forum/community sites with helpful answers "
+                "and resource sharing. Build presence before linking."
+            )
+
+        if "korean_platform" in categorized:
+            kr_count = len(categorized["korean_platform"])
+            recs.append(
+                f"Build presence on {kr_count} Korean platforms (Naver, Tistory, Brunch). "
+                "Critical for Korean SERP visibility."
+            )
+
+        if "edu_gov" in categorized:
+            eg_count = len(categorized["edu_gov"])
+            recs.append(
+                f"Target {eg_count} .edu/.gov link opportunities through scholarship "
+                "programs, research partnerships, or government resource contributions."
+            )
+
+        # Multi-competitor overlap
+        multi_comp = [o for o in result.top_opportunities if o.competitor_count >= 2]
+        if multi_comp:
+            recs.append(
+                f"{len(multi_comp)} domains link to multiple competitors but not to you. "
+                "These are high-priority targets as they validate industry relevance."
+            )
+
+        # Quick wins: high feasibility, moderate impact
+        quick_wins = [
+            o for o in result.opportunities[:100]
+            if o.feasibility_score >= 60 and o.impact_score >= 30
+        ]
+        if quick_wins:
+            recs.append(
+                f"Prioritize {len(quick_wins)} quick-win opportunities with high "
+                "feasibility and moderate impact for fastest link acquisition."
+            )
+
+        result.recommendations = recs
+
+
+# ---------------------------------------------------------------------------
+# Output Formatting
+# ---------------------------------------------------------------------------
+
+
+def format_rich_output(result: LinkGapResult) -> None:
+    """Display gap analysis results using Rich tables."""
+    console.print(f"\n[bold cyan]Link Gap Analysis: {result.target_domain}[/bold cyan]")
+    console.print(f"[dim]vs {', '.join(result.competitor_domains)}[/dim]")
+    console.print(f"[dim]Timestamp: {result.timestamp}[/dim]\n")
+
+    # Summary
+    if result.summary:
+        summary_table = Table(title="Summary", show_header=True, header_style="bold magenta")
+        summary_table.add_column("Metric", style="cyan")
+        summary_table.add_column("Value", style="green")
+        summary_table.add_row("Target DR", str(result.target_dr))
+        summary_table.add_row("Target Referring Domains", str(result.summary.target_refdomains_count))
+        summary_table.add_row("Total Gap Opportunities", str(result.summary.total_opportunities))
+        summary_table.add_row("Avg Opportunity DR", str(result.summary.avg_dr))
+        summary_table.add_row("High-DR Opportunities (50+)", str(result.summary.high_dr_count))
+
+        for comp, count in result.summary.total_competitor_refdomains.items():
+            summary_table.add_row(f"  {comp} Refdomains", str(count))
+
+        console.print(summary_table)
+
+    # Category breakdown
+    if result.summary and result.summary.category_breakdown:
+        cat_table = Table(title="\nCategory Breakdown", show_header=True, header_style="bold magenta")
+        cat_table.add_column("Category", style="cyan")
+        cat_table.add_column("Count", style="green")
+        for cat, count in result.summary.category_breakdown.items():
+            cat_table.add_row(cat, str(count))
+        console.print(cat_table)
+
+    # Top opportunities
+    if result.top_opportunities:
+        opp_table = Table(
+            title=f"\nTop Opportunities (showing {min(25, len(result.top_opportunities))})",
+            show_header=True,
+            header_style="bold magenta",
+        )
+        opp_table.add_column("Domain", style="cyan", max_width=35)
+        opp_table.add_column("DR", style="green", justify="right")
+        opp_table.add_column("Category", style="yellow")
+        opp_table.add_column("Comps", justify="right")
+        opp_table.add_column("Score", style="bold green", justify="right")
+        opp_table.add_column("Feasibility", justify="right")
+        opp_table.add_column("Impact", justify="right")
+
+        for opp in result.top_opportunities[:25]:
+            opp_table.add_row(
+                opp.domain[:35],
+                str(int(opp.dr)),
+                opp.category,
+                str(opp.competitor_count),
+                f"{opp.overall_score:.1f}",
+                f"{opp.feasibility_score:.0f}",
+                f"{opp.impact_score:.0f}",
+            )
+        console.print(opp_table)
+
+    # Issues
+    if result.issues:
+        console.print("\n[bold red]Issues:[/bold red]")
+        for issue in result.issues:
+            icon_map = {"error": "[red]ERROR[/red]", "warning": "[yellow]WARN[/yellow]", "info": "[blue]INFO[/blue]"}
+            icon = icon_map.get(issue["type"], "[dim]INFO[/dim]")
+            console.print(f"  {icon} [{issue['category']}] {issue['message']}")
+
+    # Recommendations
+    if result.recommendations:
+        console.print("\n[bold green]Recommendations:[/bold green]")
+        for i, rec in enumerate(result.recommendations, 1):
+            console.print(f"  {i}. {rec}")
+
+    console.print()
+
+
+def result_to_dict(result: LinkGapResult) -> dict[str, Any]:
+    """Convert gap result to JSON-serializable dict."""
+    return {
+        "target_url": result.target_url,
+        "target_domain": result.target_domain,
+        "target_dr": result.target_dr,
+        "competitor_urls": result.competitor_urls,
+        "competitor_domains": result.competitor_domains,
+        "summary": asdict(result.summary) if result.summary else None,
+        "opportunities": [asdict(o) for o in result.opportunities],
+        "top_opportunities": [asdict(o) for o in result.top_opportunities],
+        "issues": result.issues,
+        "recommendations": result.recommendations,
+        "timestamp": result.timestamp,
+    }
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Link Gap Finder - Identify link building opportunities vs competitors",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python link_gap_finder.py --target https://example.com --competitor https://comp1.com --json
+  python link_gap_finder.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --min-dr 30 --json
+  python link_gap_finder.py --target https://example.com --competitor https://comp1.com --country kr --output gap_report.json
+        """,
+    )
+    parser.add_argument("--target", required=True, help="Target URL or domain")
+    parser.add_argument(
+        "--competitor", action="append", required=True,
+        help="Competitor URL or domain (can be repeated)",
+    )
+    parser.add_argument(
+        "--min-dr", type=float, default=0,
+        help="Minimum DR filter for opportunities (default: 0)",
+    )
+    parser.add_argument(
+        "--country", default="",
+        help="Filter by country code (e.g., kr, us, jp)",
+    )
+    parser.add_argument(
+        "--limit", type=int, default=1000,
+        help="Max referring domains to fetch per site (default: 1000)",
+    )
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    parser.add_argument("--output", "-o", help="Save output to file")
+    return parser.parse_args()
+
+
+async def main() -> None:
+    """Main entry point."""
+    args = parse_args()
+
+    finder = LinkGapFinder()
+
+    try:
+        result = await finder.analyze(
+            target_url=args.target,
+            competitor_urls=args.competitor,
+            min_dr=args.min_dr,
+            country_filter=args.country,
+            limit=args.limit,
+        )
+
+        if args.json or args.output:
+            output_data = result_to_dict(result)
+            json_str = json.dumps(output_data, indent=2, ensure_ascii=False)
+
+            if args.output:
+                with open(args.output, "w", encoding="utf-8") as f:
+                    f.write(json_str)
+                logger.info(f"Report saved to {args.output}")
+
+            if args.json:
+                print(json_str)
+        else:
+            format_rich_output(result)
+
+        finder.print_stats()
+
+    except KeyboardInterrupt:
+        logger.warning("Analysis interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+        if args.json:
+            print(json.dumps({"error": str(e)}, indent=2))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/custom-skills/22-seo-link-building/code/scripts/requirements.txt
+++ b/custom-skills/22-seo-link-building/code/scripts/requirements.txt
@@ -0,0 +1,8 @@
+# 22-seo-link-building dependencies
+requests>=2.31.0
+aiohttp>=3.9.0
+pandas>=2.1.0
+tenacity>=8.2.0
+tqdm>=4.66.0
+python-dotenv>=1.0.0
+rich>=13.7.0