Add SEO skills 19-28, 31-32 with full Python implementations

12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:05:59 +09:00
parent 159f7ec3f7
commit a3ff965b87
125 changed files with 25948 additions and 173 deletions
--- a/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_researcher.py
+++ b/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_researcher.py
@@ -0,0 +1,656 @@
+"""
+Keyword Researcher - Seed keyword expansion, intent classification, and topic clustering
+========================================================================================
+Purpose: Expand seed keywords via Ahrefs APIs, classify search intent,
+         cluster topics, and support Korean market keyword discovery.
+Python: 3.10+
+"""
+
+import argparse
+import json
+import logging
+import re
+import subprocess
+import sys
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("keyword_researcher")
+
+# ---------------------------------------------------------------------------
+# Constants - Korean suffix expansion
+# ---------------------------------------------------------------------------
+KOREAN_SUFFIXES: list[str] = [
+    "추천",
+    "가격",
+    "후기",
+    "잘하는곳",
+    "부작용",
+    "전후",
+    "비용",
+    "추천 병원",
+    "후기 블로그",
+    "방법",
+    "종류",
+    "비교",
+    "효과",
+    "주의사항",
+    "장단점",
+]
+
+# ---------------------------------------------------------------------------
+# Intent classification patterns
+# ---------------------------------------------------------------------------
+INTENT_PATTERNS: dict[str, list[str]] = {
+    "transactional": [
+        r"구매|구입|주문|buy|order|purchase|shop|deal|discount|coupon|할인|쿠폰",
+        r"예약|booking|reserve|sign\s?up|register|등록|신청",
+    ],
+    "commercial": [
+        r"가격|비용|얼마|price|cost|pricing|fee|요금",
+        r"추천|best|top\s?\d|review|비교|compare|vs|versus|후기|리뷰|평점|평가",
+        r"잘하는곳|잘하는|맛집|업체|병원|추천\s?병원",
+    ],
+    "navigational": [
+        r"^(www\.|http|\.com|\.co\.kr|\.net)",
+        r"공식|official|login|로그인|홈페이지|사이트|website",
+        r"고객센터|contact|support|customer\s?service",
+    ],
+    "informational": [
+        r"방법|how\s?to|what\s?is|why|when|where|who|which",
+        r"뜻|의미|정의|definition|meaning|guide|tutorial",
+        r"효과|부작용|증상|원인|차이|종류|type|cause|symptom|effect",
+        r"전후|before\s?and\s?after|결과|result",
+    ],
+}
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class KeywordEntry:
+    """Single keyword with its metrics and classification."""
+
+    keyword: str
+    volume: int = 0
+    kd: float = 0.0
+    cpc: float = 0.0
+    intent: str = "informational"
+    cluster: str = ""
+    source: str = ""
+    country_volumes: dict[str, int] = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        data = asdict(self)
+        if not data["country_volumes"]:
+            del data["country_volumes"]
+        return data
+
+
+@dataclass
+class KeywordCluster:
+    """Group of semantically related keywords."""
+
+    topic: str
+    keywords: list[str] = field(default_factory=list)
+    total_volume: int = 0
+    avg_kd: float = 0.0
+    primary_intent: str = "informational"
+
+    def to_dict(self) -> dict:
+        return asdict(self)
+
+
+@dataclass
+class ResearchResult:
+    """Full research result container."""
+
+    seed_keyword: str
+    country: str
+    total_keywords: int = 0
+    total_volume: int = 0
+    clusters: list[KeywordCluster] = field(default_factory=list)
+    keywords: list[KeywordEntry] = field(default_factory=list)
+    timestamp: str = ""
+
+    def to_dict(self) -> dict:
+        return {
+            "seed_keyword": self.seed_keyword,
+            "country": self.country,
+            "total_keywords": self.total_keywords,
+            "total_volume": self.total_volume,
+            "clusters": [c.to_dict() for c in self.clusters],
+            "keywords": [k.to_dict() for k in self.keywords],
+            "timestamp": self.timestamp,
+        }
+
+
+# ---------------------------------------------------------------------------
+# MCP Helper - calls Ahrefs MCP tools via subprocess
+# ---------------------------------------------------------------------------
+
+
+def call_mcp_tool(tool_name: str, params: dict) -> dict:
+    """
+    Call an Ahrefs MCP tool and return parsed JSON response.
+
+    In production this delegates to the MCP bridge. For standalone usage
+    it invokes the Claude CLI with the appropriate tool call.
+    """
+    logger.info(f"Calling MCP tool: {tool_name} with params: {json.dumps(params, ensure_ascii=False)}")
+
+    try:
+        cmd = [
+            "claude",
+            "--print",
+            "--output-format", "json",
+            "-p",
+            f"Call the tool mcp__claude_ai_Ahrefs__{tool_name} with these parameters: {json.dumps(params, ensure_ascii=False)}. Return ONLY the raw JSON result.",
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+
+        if result.returncode != 0:
+            logger.warning(f"MCP tool {tool_name} returned non-zero exit code: {result.returncode}")
+            logger.debug(f"stderr: {result.stderr}")
+            return {"error": result.stderr, "keywords": [], "items": []}
+
+        try:
+            return json.loads(result.stdout)
+        except json.JSONDecodeError:
+            return {"raw": result.stdout, "keywords": [], "items": []}
+
+    except subprocess.TimeoutExpired:
+        logger.error(f"MCP tool {tool_name} timed out")
+        return {"error": "timeout", "keywords": [], "items": []}
+    except FileNotFoundError:
+        logger.warning("Claude CLI not found - returning empty result for standalone testing")
+        return {"keywords": [], "items": []}
+
+
+# ---------------------------------------------------------------------------
+# KeywordResearcher
+# ---------------------------------------------------------------------------
+
+
+class KeywordResearcher:
+    """Expand seed keywords, classify intent, and cluster topics."""
+
+    def __init__(self, country: str = "kr", korean_suffixes: bool = False, compare_global: bool = False):
+        self.country = country
+        self.korean_suffixes = korean_suffixes
+        self.compare_global = compare_global
+        self._seen: set[str] = set()
+
+    # ---- Keyword expansion via Ahrefs MCP ----
+
+    def expand_keywords(self, seed: str) -> list[KeywordEntry]:
+        """
+        Expand a seed keyword using Ahrefs matching-terms, related-terms,
+        and search-suggestions endpoints.
+        """
+        all_keywords: list[KeywordEntry] = []
+
+        # 1. Matching terms
+        logger.info(f"Fetching matching terms for: {seed}")
+        matching = call_mcp_tool("keywords-explorer-matching-terms", {
+            "keyword": seed,
+            "country": self.country,
+            "limit": 100,
+        })
+        for item in matching.get("keywords", matching.get("items", [])):
+            kw = self._parse_keyword_item(item, source="matching-terms")
+            if kw and kw.keyword not in self._seen:
+                self._seen.add(kw.keyword)
+                all_keywords.append(kw)
+
+        # 2. Related terms
+        logger.info(f"Fetching related terms for: {seed}")
+        related = call_mcp_tool("keywords-explorer-related-terms", {
+            "keyword": seed,
+            "country": self.country,
+            "limit": 100,
+        })
+        for item in related.get("keywords", related.get("items", [])):
+            kw = self._parse_keyword_item(item, source="related-terms")
+            if kw and kw.keyword not in self._seen:
+                self._seen.add(kw.keyword)
+                all_keywords.append(kw)
+
+        # 3. Search suggestions
+        logger.info(f"Fetching search suggestions for: {seed}")
+        suggestions = call_mcp_tool("keywords-explorer-search-suggestions", {
+            "keyword": seed,
+            "country": self.country,
+            "limit": 50,
+        })
+        for item in suggestions.get("keywords", suggestions.get("items", [])):
+            kw = self._parse_keyword_item(item, source="search-suggestions")
+            if kw and kw.keyword not in self._seen:
+                self._seen.add(kw.keyword)
+                all_keywords.append(kw)
+
+        # 4. Add the seed itself if not already present
+        if seed not in self._seen:
+            self._seen.add(seed)
+            overview = call_mcp_tool("keywords-explorer-overview", {
+                "keyword": seed,
+                "country": self.country,
+            })
+            seed_entry = self._parse_keyword_item(overview, source="seed")
+            if seed_entry:
+                seed_entry.keyword = seed
+                all_keywords.insert(0, seed_entry)
+
+        logger.info(f"Expanded to {len(all_keywords)} keywords from Ahrefs APIs")
+        return all_keywords
+
+    def expand_korean_suffixes(self, seed: str) -> list[KeywordEntry]:
+        """
+        Generate keyword variations by appending common Korean suffixes.
+        Each variation is checked against Ahrefs for volume data.
+        """
+        suffix_keywords: list[KeywordEntry] = []
+
+        for suffix in KOREAN_SUFFIXES:
+            variation = f"{seed} {suffix}"
+            if variation in self._seen:
+                continue
+
+            logger.info(f"Checking Korean suffix variation: {variation}")
+            overview = call_mcp_tool("keywords-explorer-overview", {
+                "keyword": variation,
+                "country": self.country,
+            })
+            kw = self._parse_keyword_item(overview, source="korean-suffix")
+            if kw:
+                kw.keyword = variation
+                if kw.volume > 0:
+                    self._seen.add(variation)
+                    suffix_keywords.append(kw)
+            else:
+                # Even if no data, include as zero-volume for completeness
+                entry = KeywordEntry(
+                    keyword=variation,
+                    volume=0,
+                    kd=0.0,
+                    cpc=0.0,
+                    intent=self.classify_intent(variation),
+                    source="korean-suffix",
+                )
+                self._seen.add(variation)
+                suffix_keywords.append(entry)
+
+        logger.info(f"Korean suffix expansion yielded {len(suffix_keywords)} variations")
+        return suffix_keywords
+
+    def get_volume_by_country(self, keyword: str) -> dict[str, int]:
+        """
+        Get search volume breakdown by country for a keyword.
+        Useful for comparing Korean vs global demand.
+        """
+        logger.info(f"Fetching volume-by-country for: {keyword}")
+        result = call_mcp_tool("keywords-explorer-volume-by-country", {
+            "keyword": keyword,
+        })
+
+        volumes: dict[str, int] = {}
+        for item in result.get("countries", result.get("items", [])):
+            if isinstance(item, dict):
+                country_code = item.get("country", item.get("code", ""))
+                volume = item.get("volume", item.get("search_volume", 0))
+                if country_code and volume:
+                    volumes[country_code.lower()] = int(volume)
+
+        return volumes
+
+    # ---- Intent classification ----
+
+    def classify_intent(self, keyword: str) -> str:
+        """
+        Classify search intent based on keyword patterns.
+        Priority: transactional > commercial > navigational > informational
+        """
+        keyword_lower = keyword.lower().strip()
+
+        for intent, patterns in INTENT_PATTERNS.items():
+            for pattern in patterns:
+                if re.search(pattern, keyword_lower, re.IGNORECASE):
+                    return intent
+
+        return "informational"
+
+    # ---- Keyword clustering ----
+
+    def cluster_keywords(self, keywords: list[KeywordEntry]) -> list[KeywordCluster]:
+        """
+        Group keywords into topic clusters using shared n-gram tokens.
+        Uses a simple token overlap approach: keywords sharing significant
+        tokens (2+ character words) are grouped together.
+        """
+        if not keywords:
+            return []
+
+        # Extract meaningful tokens from each keyword
+        def tokenize(text: str) -> set[str]:
+            tokens = set()
+            for word in re.split(r"\s+", text.strip().lower()):
+                if len(word) >= 2:
+                    tokens.add(word)
+            return tokens
+
+        # Build token-to-keyword mapping
+        token_map: dict[str, list[int]] = {}
+        kw_tokens: list[set[str]] = []
+
+        for i, kw in enumerate(keywords):
+            tokens = tokenize(kw.keyword)
+            kw_tokens.append(tokens)
+            for token in tokens:
+                if token not in token_map:
+                    token_map[token] = []
+                token_map[token].append(i)
+
+        # Find the most common significant tokens (cluster anchors)
+        token_freq = sorted(token_map.items(), key=lambda x: len(x[1]), reverse=True)
+
+        assigned: set[int] = set()
+        clusters: list[KeywordCluster] = []
+
+        for token, indices in token_freq:
+            # Skip single-occurrence tokens or very common stop-like tokens
+            if len(indices) < 2:
+                continue
+
+            # Gather unassigned keywords that share this token
+            cluster_indices = [i for i in indices if i not in assigned]
+            if len(cluster_indices) < 2:
+                continue
+
+            # Create the cluster
+            cluster_kws = [keywords[i].keyword for i in cluster_indices]
+            cluster_volumes = [keywords[i].volume for i in cluster_indices]
+            cluster_kds = [keywords[i].kd for i in cluster_indices]
+            cluster_intents = [keywords[i].intent for i in cluster_indices]
+
+            # Determine primary intent by frequency
+            intent_counts: dict[str, int] = {}
+            for intent in cluster_intents:
+                intent_counts[intent] = intent_counts.get(intent, 0) + 1
+            primary_intent = max(intent_counts, key=intent_counts.get)
+
+            cluster = KeywordCluster(
+                topic=token,
+                keywords=cluster_kws,
+                total_volume=sum(cluster_volumes),
+                avg_kd=round(sum(cluster_kds) / len(cluster_kds), 1) if cluster_kds else 0.0,
+                primary_intent=primary_intent,
+            )
+            clusters.append(cluster)
+
+            for i in cluster_indices:
+                assigned.add(i)
+                keywords[i].cluster = token
+
+        # Assign unclustered keywords to an "other" cluster
+        unclustered = [i for i in range(len(keywords)) if i not in assigned]
+        if unclustered:
+            other_kws = [keywords[i].keyword for i in unclustered]
+            other_volumes = [keywords[i].volume for i in unclustered]
+            other_kds = [keywords[i].kd for i in unclustered]
+
+            other_cluster = KeywordCluster(
+                topic="(unclustered)",
+                keywords=other_kws,
+                total_volume=sum(other_volumes),
+                avg_kd=round(sum(other_kds) / len(other_kds), 1) if other_kds else 0.0,
+                primary_intent="informational",
+            )
+            clusters.append(other_cluster)
+
+            for i in unclustered:
+                keywords[i].cluster = "(unclustered)"
+
+        # Sort clusters by total volume descending
+        clusters.sort(key=lambda c: c.total_volume, reverse=True)
+
+        logger.info(f"Clustered {len(keywords)} keywords into {len(clusters)} clusters")
+        return clusters
+
+    # ---- Full analysis orchestration ----
+
+    def analyze(self, seed_keyword: str) -> ResearchResult:
+        """
+        Orchestrate a full keyword research analysis:
+        1. Expand seed via Ahrefs
+        2. Optionally expand Korean suffixes
+        3. Classify intent for all keywords
+        4. Optionally fetch volume-by-country
+        5. Cluster keywords into topics
+        6. Compile results
+        """
+        logger.info(f"Starting keyword research for: {seed_keyword} (country={self.country})")
+
+        # Step 1: Expand keywords
+        keywords = self.expand_keywords(seed_keyword)
+
+        # Step 2: Korean suffix expansion
+        if self.korean_suffixes:
+            suffix_keywords = self.expand_korean_suffixes(seed_keyword)
+            keywords.extend(suffix_keywords)
+
+        # Step 3: Classify intent for all keywords
+        for kw in keywords:
+            if not kw.intent or kw.intent == "informational":
+                kw.intent = self.classify_intent(kw.keyword)
+
+        # Step 4: Volume-by-country comparison
+        if self.compare_global and keywords:
+            # Fetch for the seed and top volume keywords
+            top_keywords = sorted(keywords, key=lambda k: k.volume, reverse=True)[:10]
+            for kw in top_keywords:
+                volumes = self.get_volume_by_country(kw.keyword)
+                kw.country_volumes = volumes
+
+        # Step 5: Cluster keywords
+        clusters = self.cluster_keywords(keywords)
+
+        # Step 6: Compile result
+        result = ResearchResult(
+            seed_keyword=seed_keyword,
+            country=self.country,
+            total_keywords=len(keywords),
+            total_volume=sum(kw.volume for kw in keywords),
+            clusters=clusters,
+            keywords=sorted(keywords, key=lambda k: k.volume, reverse=True),
+            timestamp=datetime.now().isoformat(),
+        )
+
+        logger.info(
+            f"Research complete: {result.total_keywords} keywords, "
+            f"{len(result.clusters)} clusters, "
+            f"total volume {result.total_volume}"
+        )
+        return result
+
+    # ---- Internal helpers ----
+
+    def _parse_keyword_item(self, item: dict, source: str = "") -> Optional[KeywordEntry]:
+        """Parse an Ahrefs API response item into a KeywordEntry."""
+        if not item or "error" in item:
+            return None
+
+        keyword = item.get("keyword", item.get("term", item.get("query", "")))
+        if not keyword:
+            return None
+
+        volume = int(item.get("volume", item.get("search_volume", 0)) or 0)
+        kd = float(item.get("keyword_difficulty", item.get("kd", 0)) or 0)
+        cpc = float(item.get("cpc", item.get("cost_per_click", 0)) or 0)
+
+        return KeywordEntry(
+            keyword=keyword,
+            volume=volume,
+            kd=round(kd, 1),
+            cpc=round(cpc, 2),
+            intent=self.classify_intent(keyword),
+            source=source,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plain-text report formatter
+# ---------------------------------------------------------------------------
+
+
+def format_text_report(result: ResearchResult) -> str:
+    """Format research result as a human-readable text report."""
+    lines: list[str] = []
+    lines.append("=" * 70)
+    lines.append(f"Keyword Strategy Report: {result.seed_keyword}")
+    lines.append(f"Country: {result.country.upper()} | Date: {result.timestamp[:10]}")
+    lines.append("=" * 70)
+    lines.append("")
+
+    lines.append("## Overview")
+    lines.append(f"  Total keywords discovered: {result.total_keywords}")
+    lines.append(f"  Topic clusters: {len(result.clusters)}")
+    lines.append(f"  Total search volume: {result.total_volume:,}")
+    lines.append("")
+
+    # Clusters summary
+    if result.clusters:
+        lines.append("## Top Clusters")
+        lines.append(f"  {'Cluster':<25} {'Keywords':>8} {'Volume':>10} {'Avg KD':>8} {'Intent':<15}")
+        lines.append("  " + "-" * 66)
+        for cluster in result.clusters[:15]:
+            lines.append(
+                f"  {cluster.topic:<25} {len(cluster.keywords):>8} "
+                f"{cluster.total_volume:>10,} {cluster.avg_kd:>8.1f} "
+                f"{cluster.primary_intent:<15}"
+            )
+        lines.append("")
+
+    # Top keywords
+    if result.keywords:
+        lines.append("## Top Keywords (by volume)")
+        lines.append(f"  {'Keyword':<40} {'Vol':>8} {'KD':>6} {'CPC':>7} {'Intent':<15} {'Cluster':<15}")
+        lines.append("  " + "-" * 91)
+        for kw in result.keywords[:30]:
+            kw_display = kw.keyword[:38] if len(kw.keyword) > 38 else kw.keyword
+            cluster_display = kw.cluster[:13] if len(kw.cluster) > 13 else kw.cluster
+            lines.append(
+                f"  {kw_display:<40} {kw.volume:>8,} {kw.kd:>6.1f} "
+                f"{kw.cpc:>7.2f} {kw.intent:<15} {cluster_display:<15}"
+            )
+        lines.append("")
+
+    # Intent distribution
+    intent_dist: dict[str, int] = {}
+    for kw in result.keywords:
+        intent_dist[kw.intent] = intent_dist.get(kw.intent, 0) + 1
+    if intent_dist:
+        lines.append("## Intent Distribution")
+        for intent, count in sorted(intent_dist.items(), key=lambda x: x[1], reverse=True):
+            pct = (count / len(result.keywords)) * 100 if result.keywords else 0
+            lines.append(f"  {intent:<15}: {count:>5} ({pct:.1f}%)")
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Keyword Researcher - Expand, classify, and cluster keywords",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python keyword_researcher.py --keyword "치과 임플란트" --country kr --json
+  python keyword_researcher.py --keyword "dental implant" --compare-global --json
+  python keyword_researcher.py --keyword "치과 임플란트" --korean-suffixes --output report.json
+        """,
+    )
+    parser.add_argument(
+        "--keyword",
+        required=True,
+        help="Seed keyword to expand and research",
+    )
+    parser.add_argument(
+        "--country",
+        default="kr",
+        help="Target country code (default: kr)",
+    )
+    parser.add_argument(
+        "--korean-suffixes",
+        action="store_true",
+        help="Enable Korean suffix expansion (추천, 가격, 후기, etc.)",
+    )
+    parser.add_argument(
+        "--compare-global",
+        action="store_true",
+        help="Fetch volume-by-country comparison for top keywords",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        dest="output_json",
+        help="Output results as JSON",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Write output to file (path)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose/debug logging",
+    )
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    # Run analysis
+    researcher = KeywordResearcher(
+        country=args.country,
+        korean_suffixes=args.korean_suffixes,
+        compare_global=args.compare_global,
+    )
+    result = researcher.analyze(args.keyword)
+
+    # Format output
+    if args.output_json:
+        output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
+    else:
+        output = format_text_report(result)
+
+    # Write or print
+    if args.output:
+        with open(args.output, "w", encoding="utf-8") as f:
+            f.write(output)
+        logger.info(f"Output written to: {args.output}")
+    else:
+        print(output)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())