Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
891
custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py
Normal file
891
custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py
Normal file
@@ -0,0 +1,891 @@
|
||||
"""
|
||||
SERP Analyzer - Google SERP feature detection and competitor mapping
|
||||
====================================================================
|
||||
Purpose: Analyze Google SERP features, map competitor positions,
|
||||
classify content types, and score SERP opportunities.
|
||||
Python: 3.10+
|
||||
|
||||
Usage:
|
||||
python serp_analyzer.py --keyword "치과 임플란트" --country kr --json
|
||||
python serp_analyzer.py --keywords-file keywords.txt --country kr --json
|
||||
python serp_analyzer.py --keyword "dental implant" --output serp_report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging
|
||||
# ---------------------------------------------------------------------------
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
console = Console()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data Classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SerpFeatures:
|
||||
"""Tracks presence and count of Google SERP features."""
|
||||
|
||||
featured_snippet: bool = False
|
||||
people_also_ask: bool = False
|
||||
local_pack: bool = False
|
||||
knowledge_panel: bool = False
|
||||
video_carousel: bool = False
|
||||
image_pack: bool = False
|
||||
site_links: bool = False
|
||||
ads_top: int = 0
|
||||
ads_bottom: int = 0
|
||||
shopping: bool = False
|
||||
|
||||
@property
|
||||
def feature_count(self) -> int:
|
||||
"""Count of boolean features that are present."""
|
||||
count = 0
|
||||
for f in [
|
||||
self.featured_snippet,
|
||||
self.people_also_ask,
|
||||
self.local_pack,
|
||||
self.knowledge_panel,
|
||||
self.video_carousel,
|
||||
self.image_pack,
|
||||
self.site_links,
|
||||
self.shopping,
|
||||
]:
|
||||
if f:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
@property
|
||||
def has_ads(self) -> bool:
|
||||
return self.ads_top > 0 or self.ads_bottom > 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompetitorPosition:
|
||||
"""A single competitor entry in the SERP."""
|
||||
|
||||
position: int
|
||||
url: str
|
||||
domain: str
|
||||
title: str = ""
|
||||
content_type: str = "unknown"
|
||||
is_featured: bool = False
|
||||
has_sitelinks: bool = False
|
||||
estimated_traffic_share: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class SerpResult:
|
||||
"""Complete SERP analysis result for a keyword."""
|
||||
|
||||
keyword: str
|
||||
country: str = "us"
|
||||
search_volume: int = 0
|
||||
keyword_difficulty: float = 0.0
|
||||
cpc: float = 0.0
|
||||
serp_features: SerpFeatures = field(default_factory=SerpFeatures)
|
||||
competitors: list[CompetitorPosition] = field(default_factory=list)
|
||||
opportunity_score: int = 0
|
||||
intent_signals: str = "informational"
|
||||
content_type_distribution: dict[str, int] = field(default_factory=dict)
|
||||
volatility: str = "stable"
|
||||
timestamp: str = ""
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.timestamp:
|
||||
self.timestamp = datetime.now().isoformat()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Content Type Classifiers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# URL path patterns that hint at content type
|
||||
URL_CONTENT_PATTERNS: dict[str, list[str]] = {
|
||||
"blog": [
|
||||
r"/blog/",
|
||||
r"/post/",
|
||||
r"/article/",
|
||||
r"/news/",
|
||||
r"/magazine/",
|
||||
r"/journal/",
|
||||
r"/column/",
|
||||
r"/story/",
|
||||
r"\d{4}/\d{2}/",
|
||||
],
|
||||
"product": [
|
||||
r"/product/",
|
||||
r"/item/",
|
||||
r"/shop/",
|
||||
r"/store/",
|
||||
r"/buy/",
|
||||
r"/p/",
|
||||
r"/goods/",
|
||||
r"/catalog/",
|
||||
],
|
||||
"service": [
|
||||
r"/service",
|
||||
r"/solution",
|
||||
r"/treatment",
|
||||
r"/procedure",
|
||||
r"/pricing",
|
||||
r"/consultation",
|
||||
],
|
||||
"news": [
|
||||
r"/news/",
|
||||
r"/press/",
|
||||
r"/media/",
|
||||
r"/release/",
|
||||
r"news\.",
|
||||
r"press\.",
|
||||
],
|
||||
"video": [
|
||||
r"youtube\.com/watch",
|
||||
r"youtu\.be/",
|
||||
r"vimeo\.com/",
|
||||
r"/video/",
|
||||
r"/watch/",
|
||||
],
|
||||
"forum": [
|
||||
r"/forum/",
|
||||
r"/community/",
|
||||
r"/discuss",
|
||||
r"/thread/",
|
||||
r"/question/",
|
||||
r"/answers/",
|
||||
],
|
||||
"wiki": [
|
||||
r"wikipedia\.org",
|
||||
r"/wiki/",
|
||||
r"namu\.wiki",
|
||||
],
|
||||
}
|
||||
|
||||
# Title keywords that hint at content type
|
||||
TITLE_CONTENT_PATTERNS: dict[str, list[str]] = {
|
||||
"blog": ["블로그", "후기", "리뷰", "review", "guide", "가이드", "팁", "tips"],
|
||||
"product": ["구매", "가격", "buy", "price", "shop", "할인", "sale", "최저가"],
|
||||
"service": ["상담", "치료", "진료", "병원", "클리닉", "clinic", "treatment"],
|
||||
"news": ["뉴스", "속보", "보도", "news", "기사", "report"],
|
||||
"video": ["영상", "동영상", "video", "youtube"],
|
||||
"comparison": ["비교", "vs", "versus", "compare", "차이", "best"],
|
||||
}
|
||||
|
||||
# CTR distribution by position (approximate click-through rates)
|
||||
CTR_BY_POSITION: dict[int, float] = {
|
||||
1: 0.316,
|
||||
2: 0.158,
|
||||
3: 0.110,
|
||||
4: 0.080,
|
||||
5: 0.062,
|
||||
6: 0.049,
|
||||
7: 0.040,
|
||||
8: 0.034,
|
||||
9: 0.029,
|
||||
10: 0.025,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SERP Analyzer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SerpAnalyzer:
|
||||
"""Analyzes Google SERP features, competitor positions, and opportunities."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
# ----- Data Fetching -----
|
||||
|
||||
def get_serp_data(self, keyword: str, country: str = "us") -> dict[str, Any]:
|
||||
"""
|
||||
Fetch SERP data via Ahrefs serp-overview MCP tool.
|
||||
|
||||
Uses subprocess to invoke the Ahrefs MCP tool. Falls back to a
|
||||
structured placeholder when the MCP tool is unavailable (e.g., in
|
||||
standalone / CI environments).
|
||||
"""
|
||||
self.logger.info(f"Fetching SERP data for '{keyword}' (country={country})")
|
||||
|
||||
try:
|
||||
# Attempt MCP tool call via subprocess
|
||||
cmd = [
|
||||
"claude",
|
||||
"mcp",
|
||||
"call",
|
||||
"ahrefs",
|
||||
"serp-overview",
|
||||
json.dumps({"keyword": keyword, "country": country}),
|
||||
]
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
data = json.loads(result.stdout)
|
||||
self.logger.info("Successfully fetched SERP data via MCP")
|
||||
return data
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError) as exc:
|
||||
self.logger.warning(f"MCP call unavailable ({exc}), using keyword metrics fallback")
|
||||
|
||||
# Fallback: try Ahrefs keywords-explorer-overview
|
||||
try:
|
||||
cmd_kw = [
|
||||
"claude",
|
||||
"mcp",
|
||||
"call",
|
||||
"ahrefs",
|
||||
"keywords-explorer-overview",
|
||||
json.dumps({"keyword": keyword, "country": country}),
|
||||
]
|
||||
result_kw = subprocess.run(
|
||||
cmd_kw,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if result_kw.returncode == 0 and result_kw.stdout.strip():
|
||||
data = json.loads(result_kw.stdout)
|
||||
self.logger.info("Fetched keyword overview via MCP")
|
||||
return data
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError) as exc:
|
||||
self.logger.warning(f"Keywords-explorer MCP also unavailable ({exc})")
|
||||
|
||||
# Return empty structure when no MCP tools available
|
||||
self.logger.warning(
|
||||
"No MCP data source available. Run inside Claude Desktop "
|
||||
"or provide data via --input flag."
|
||||
)
|
||||
return {
|
||||
"keyword": keyword,
|
||||
"country": country,
|
||||
"serp": [],
|
||||
"serp_features": {},
|
||||
"metrics": {},
|
||||
}
|
||||
|
||||
# ----- Feature Detection -----
|
||||
|
||||
def detect_features(self, serp_data: dict[str, Any]) -> SerpFeatures:
|
||||
"""
|
||||
Identify SERP features from Ahrefs response data.
|
||||
|
||||
Handles both the structured 'serp_features' dict returned by
|
||||
keywords-explorer-overview and the raw SERP items list from
|
||||
serp-overview.
|
||||
"""
|
||||
features = SerpFeatures()
|
||||
|
||||
# -- Method 1: structured serp_features from Ahrefs --
|
||||
sf = serp_data.get("serp_features", {})
|
||||
if isinstance(sf, dict):
|
||||
features.featured_snippet = sf.get("featured_snippet", False)
|
||||
features.people_also_ask = sf.get("people_also_ask", False)
|
||||
features.local_pack = sf.get("local_pack", False)
|
||||
features.knowledge_panel = sf.get("knowledge_panel", False) or sf.get(
|
||||
"knowledge_graph", False
|
||||
)
|
||||
features.video_carousel = sf.get("video", False) or sf.get(
|
||||
"video_carousel", False
|
||||
)
|
||||
features.image_pack = sf.get("image_pack", False) or sf.get(
|
||||
"images", False
|
||||
)
|
||||
features.site_links = sf.get("sitelinks", False) or sf.get(
|
||||
"site_links", False
|
||||
)
|
||||
features.shopping = sf.get("shopping_results", False) or sf.get(
|
||||
"shopping", False
|
||||
)
|
||||
features.ads_top = int(sf.get("ads_top", 0) or 0)
|
||||
features.ads_bottom = int(sf.get("ads_bottom", 0) or 0)
|
||||
|
||||
# -- Method 2: infer from raw SERP items list --
|
||||
serp_items = serp_data.get("serp", [])
|
||||
if isinstance(serp_items, list):
|
||||
for item in serp_items:
|
||||
item_type = str(item.get("type", "")).lower()
|
||||
if "featured_snippet" in item_type or item.get("is_featured"):
|
||||
features.featured_snippet = True
|
||||
if "people_also_ask" in item_type or "paa" in item_type:
|
||||
features.people_also_ask = True
|
||||
if "local" in item_type or "map" in item_type:
|
||||
features.local_pack = True
|
||||
if "knowledge" in item_type:
|
||||
features.knowledge_panel = True
|
||||
if "video" in item_type:
|
||||
features.video_carousel = True
|
||||
if "image" in item_type:
|
||||
features.image_pack = True
|
||||
if item.get("sitelinks"):
|
||||
features.site_links = True
|
||||
if "shopping" in item_type:
|
||||
features.shopping = True
|
||||
if "ad" in item_type:
|
||||
pos = item.get("position", 0)
|
||||
if pos <= 4:
|
||||
features.ads_top += 1
|
||||
else:
|
||||
features.ads_bottom += 1
|
||||
|
||||
return features
|
||||
|
||||
# ----- Competitor Mapping -----
|
||||
|
||||
def map_competitors(self, serp_data: dict[str, Any]) -> list[CompetitorPosition]:
|
||||
"""Extract competitor positions and domains from SERP data."""
|
||||
competitors: list[CompetitorPosition] = []
|
||||
serp_items = serp_data.get("serp", [])
|
||||
|
||||
if not isinstance(serp_items, list):
|
||||
return competitors
|
||||
|
||||
for item in serp_items:
|
||||
url = item.get("url", "")
|
||||
if not url:
|
||||
continue
|
||||
|
||||
# Skip ads for organic mapping
|
||||
item_type = str(item.get("type", "")).lower()
|
||||
if "ad" in item_type:
|
||||
continue
|
||||
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.replace("www.", "")
|
||||
position = int(item.get("position", len(competitors) + 1))
|
||||
title = item.get("title", "")
|
||||
|
||||
content_type = self.classify_content_type(item)
|
||||
traffic_share = CTR_BY_POSITION.get(position, 0.01)
|
||||
|
||||
comp = CompetitorPosition(
|
||||
position=position,
|
||||
url=url,
|
||||
domain=domain,
|
||||
title=title,
|
||||
content_type=content_type,
|
||||
is_featured=bool(item.get("is_featured")),
|
||||
has_sitelinks=bool(item.get("sitelinks")),
|
||||
estimated_traffic_share=round(traffic_share, 4),
|
||||
)
|
||||
competitors.append(comp)
|
||||
|
||||
# Sort by position
|
||||
competitors.sort(key=lambda c: c.position)
|
||||
return competitors
|
||||
|
||||
# ----- Content Type Classification -----
|
||||
|
||||
def classify_content_type(self, result: dict[str, Any]) -> str:
|
||||
"""
|
||||
Classify a SERP result as blog/product/service/news/video/forum/wiki
|
||||
based on URL patterns and title keywords.
|
||||
"""
|
||||
url = result.get("url", "").lower()
|
||||
title = result.get("title", "").lower()
|
||||
|
||||
scores: dict[str, int] = {}
|
||||
|
||||
# Score from URL patterns
|
||||
for ctype, patterns in URL_CONTENT_PATTERNS.items():
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, url):
|
||||
scores[ctype] = scores.get(ctype, 0) + 2
|
||||
break
|
||||
|
||||
# Score from title patterns
|
||||
for ctype, keywords in TITLE_CONTENT_PATTERNS.items():
|
||||
for kw in keywords:
|
||||
if kw.lower() in title:
|
||||
scores[ctype] = scores.get(ctype, 0) + 1
|
||||
|
||||
if not scores:
|
||||
# Heuristic: if domain is a known authority site
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
if any(d in domain for d in ["wikipedia", "namu.wiki", "나무위키"]):
|
||||
return "wiki"
|
||||
if any(d in domain for d in ["youtube", "vimeo"]):
|
||||
return "video"
|
||||
if any(d in domain for d in ["naver.com", "tistory.com", "brunch.co.kr"]):
|
||||
return "blog"
|
||||
return "service_page"
|
||||
|
||||
# Return highest scoring type
|
||||
return max(scores, key=scores.get) # type: ignore[arg-type]
|
||||
|
||||
# ----- Opportunity Scoring -----
|
||||
|
||||
def calculate_opportunity_score(
|
||||
self,
|
||||
features: SerpFeatures,
|
||||
positions: list[CompetitorPosition],
|
||||
) -> int:
|
||||
"""
|
||||
Score SERP opportunity from 0-100.
|
||||
|
||||
Higher scores indicate better opportunity to rank or gain features.
|
||||
|
||||
Factors (additive):
|
||||
- Featured snippet available but could be captured +15
|
||||
- PAA present (related question opportunity) +10
|
||||
- No knowledge panel (less SERP real-estate taken) +10
|
||||
- Low ad count (more organic visibility) +10
|
||||
- Few sitelinks in top results +5
|
||||
- Content diversity (various domains in top 10) +10
|
||||
- No video carousel (opportunity to add video) +5
|
||||
- Top results are blogs (easier to outrank) +10
|
||||
- Image pack absent (image SEO opportunity) +5
|
||||
- Shopping absent for commercial keywords +5
|
||||
- Top positions lacking schema/rich results +5
|
||||
|
||||
Penalty factors (subtractive):
|
||||
- Knowledge panel dominates -15
|
||||
- Heavy ad presence (4+ top ads) -10
|
||||
- Single domain dominates top 5 -10
|
||||
"""
|
||||
score = 50 # Base score
|
||||
|
||||
# -- Positive signals --
|
||||
if features.featured_snippet:
|
||||
score += 15
|
||||
if features.people_also_ask:
|
||||
score += 10
|
||||
if not features.knowledge_panel:
|
||||
score += 10
|
||||
if features.ads_top <= 1:
|
||||
score += 10
|
||||
elif features.ads_top <= 2:
|
||||
score += 5
|
||||
if not features.video_carousel:
|
||||
score += 5
|
||||
if not features.image_pack:
|
||||
score += 5
|
||||
if not features.shopping:
|
||||
score += 5
|
||||
|
||||
# Domain diversity in top 10
|
||||
if positions:
|
||||
top10_domains = {p.domain for p in positions[:10]}
|
||||
if len(top10_domains) >= 8:
|
||||
score += 10
|
||||
elif len(top10_domains) >= 5:
|
||||
score += 5
|
||||
|
||||
# Blog-heavy top results (easier to compete)
|
||||
blog_count = sum(
|
||||
1 for p in positions[:5] if p.content_type == "blog"
|
||||
)
|
||||
if blog_count >= 3:
|
||||
score += 10
|
||||
elif blog_count >= 2:
|
||||
score += 5
|
||||
|
||||
# Sitelinks reduce available space
|
||||
sitelink_count = sum(1 for p in positions[:5] if p.has_sitelinks)
|
||||
if sitelink_count <= 1:
|
||||
score += 5
|
||||
|
||||
# Single domain dominance penalty
|
||||
domain_counts: dict[str, int] = {}
|
||||
for p in positions[:5]:
|
||||
domain_counts[p.domain] = domain_counts.get(p.domain, 0) + 1
|
||||
if any(c >= 3 for c in domain_counts.values()):
|
||||
score -= 10
|
||||
|
||||
# -- Negative signals --
|
||||
if features.knowledge_panel:
|
||||
score -= 15
|
||||
if features.ads_top >= 4:
|
||||
score -= 10
|
||||
elif features.ads_top >= 3:
|
||||
score -= 5
|
||||
|
||||
# Clamp to 0-100
|
||||
return max(0, min(100, score))
|
||||
|
||||
# ----- Intent Validation -----
|
||||
|
||||
def validate_intent(
|
||||
self,
|
||||
features: SerpFeatures,
|
||||
positions: list[CompetitorPosition],
|
||||
) -> str:
|
||||
"""
|
||||
Infer search intent from SERP composition.
|
||||
|
||||
Returns one of: informational, navigational, commercial, transactional, local
|
||||
"""
|
||||
signals: dict[str, int] = {
|
||||
"informational": 0,
|
||||
"navigational": 0,
|
||||
"commercial": 0,
|
||||
"transactional": 0,
|
||||
"local": 0,
|
||||
}
|
||||
|
||||
# Feature-based signals
|
||||
if features.featured_snippet:
|
||||
signals["informational"] += 3
|
||||
if features.people_also_ask:
|
||||
signals["informational"] += 2
|
||||
if features.knowledge_panel:
|
||||
signals["informational"] += 2
|
||||
signals["navigational"] += 2
|
||||
if features.local_pack:
|
||||
signals["local"] += 5
|
||||
if features.shopping:
|
||||
signals["transactional"] += 4
|
||||
if features.has_ads:
|
||||
signals["commercial"] += 2
|
||||
signals["transactional"] += 1
|
||||
if features.ads_top >= 3:
|
||||
signals["transactional"] += 2
|
||||
if features.image_pack:
|
||||
signals["informational"] += 1
|
||||
if features.video_carousel:
|
||||
signals["informational"] += 1
|
||||
|
||||
# Content type signals from top results
|
||||
for pos in positions[:10]:
|
||||
ct = pos.content_type
|
||||
if ct == "blog":
|
||||
signals["informational"] += 1
|
||||
elif ct == "product":
|
||||
signals["transactional"] += 2
|
||||
elif ct == "service":
|
||||
signals["commercial"] += 1
|
||||
elif ct == "news":
|
||||
signals["informational"] += 1
|
||||
elif ct == "video":
|
||||
signals["informational"] += 1
|
||||
elif ct == "wiki":
|
||||
signals["informational"] += 2
|
||||
elif ct == "forum":
|
||||
signals["informational"] += 1
|
||||
elif ct == "comparison":
|
||||
signals["commercial"] += 2
|
||||
|
||||
# Navigational: single domain dominates top 3
|
||||
if positions:
|
||||
top3_domains = [p.domain for p in positions[:3]]
|
||||
if len(set(top3_domains)) == 1:
|
||||
signals["navigational"] += 5
|
||||
|
||||
# Return highest signal
|
||||
return max(signals, key=signals.get) # type: ignore[arg-type]
|
||||
|
||||
# ----- Content Type Distribution -----
|
||||
|
||||
def _content_type_distribution(
|
||||
self, positions: list[CompetitorPosition]
|
||||
) -> dict[str, int]:
|
||||
"""Count content types across top organic results."""
|
||||
dist: dict[str, int] = {}
|
||||
for p in positions[:10]:
|
||||
dist[p.content_type] = dist.get(p.content_type, 0) + 1
|
||||
return dict(sorted(dist.items(), key=lambda x: x[1], reverse=True))
|
||||
|
||||
# ----- Volatility Assessment -----
|
||||
|
||||
def _assess_volatility(self, serp_data: dict[str, Any]) -> str:
|
||||
"""
|
||||
Assess SERP volatility based on available signals.
|
||||
|
||||
Returns: stable, moderate, volatile
|
||||
"""
|
||||
# Check if Ahrefs provides a volatility/movement score
|
||||
metrics = serp_data.get("metrics", {})
|
||||
if isinstance(metrics, dict):
|
||||
volatility_score = metrics.get("serp_volatility", None)
|
||||
if volatility_score is not None:
|
||||
if volatility_score < 3:
|
||||
return "stable"
|
||||
elif volatility_score < 7:
|
||||
return "moderate"
|
||||
else:
|
||||
return "volatile"
|
||||
|
||||
# Heuristic: if many results have recent dates, SERP is more volatile
|
||||
serp_items = serp_data.get("serp", [])
|
||||
if isinstance(serp_items, list) and serp_items:
|
||||
recent_count = 0
|
||||
for item in serp_items[:10]:
|
||||
last_seen = item.get("last_seen", "")
|
||||
if last_seen:
|
||||
try:
|
||||
dt = datetime.fromisoformat(last_seen.replace("Z", "+00:00"))
|
||||
if (datetime.now(dt.tzinfo) - dt).days < 30:
|
||||
recent_count += 1
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if recent_count >= 5:
|
||||
return "volatile"
|
||||
elif recent_count >= 3:
|
||||
return "moderate"
|
||||
|
||||
return "stable"
|
||||
|
||||
# ----- Main Analysis Orchestrator -----
|
||||
|
||||
def analyze(self, keyword: str, country: str = "us") -> SerpResult:
|
||||
"""
|
||||
Orchestrate full SERP analysis for a single keyword.
|
||||
|
||||
Steps:
|
||||
1. Fetch SERP data from Ahrefs MCP
|
||||
2. Detect SERP features
|
||||
3. Map competitor positions
|
||||
4. Classify content types
|
||||
5. Calculate opportunity score
|
||||
6. Validate search intent
|
||||
7. Assess volatility
|
||||
"""
|
||||
serp_data = self.get_serp_data(keyword, country)
|
||||
|
||||
features = self.detect_features(serp_data)
|
||||
positions = self.map_competitors(serp_data)
|
||||
opportunity = self.calculate_opportunity_score(features, positions)
|
||||
intent = self.validate_intent(features, positions)
|
||||
content_dist = self._content_type_distribution(positions)
|
||||
volatility = self._assess_volatility(serp_data)
|
||||
|
||||
# Extract keyword metrics if available
|
||||
metrics = serp_data.get("metrics", {})
|
||||
search_volume = int(metrics.get("search_volume", 0) or 0)
|
||||
keyword_difficulty = float(metrics.get("keyword_difficulty", 0) or 0)
|
||||
cpc = float(metrics.get("cpc", 0) or 0)
|
||||
|
||||
result = SerpResult(
|
||||
keyword=keyword,
|
||||
country=country,
|
||||
search_volume=search_volume,
|
||||
keyword_difficulty=keyword_difficulty,
|
||||
cpc=cpc,
|
||||
serp_features=features,
|
||||
competitors=positions,
|
||||
opportunity_score=opportunity,
|
||||
intent_signals=intent,
|
||||
content_type_distribution=content_dist,
|
||||
volatility=volatility,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def result_to_dict(result: SerpResult) -> dict[str, Any]:
|
||||
"""Convert SerpResult to a JSON-serializable dictionary."""
|
||||
d = asdict(result)
|
||||
return d
|
||||
|
||||
|
||||
def print_rich_report(result: SerpResult) -> None:
|
||||
"""Print a human-readable report using rich."""
|
||||
console.rule(f"[bold blue]SERP Analysis: {result.keyword}")
|
||||
console.print(f"[dim]Country: {result.country} | Timestamp: {result.timestamp}[/dim]")
|
||||
console.print()
|
||||
|
||||
# Metrics
|
||||
if result.search_volume or result.keyword_difficulty:
|
||||
metrics_table = Table(title="Keyword Metrics", show_lines=True)
|
||||
metrics_table.add_column("Metric", style="cyan")
|
||||
metrics_table.add_column("Value", style="green")
|
||||
metrics_table.add_row("Search Volume", f"{result.search_volume:,}")
|
||||
metrics_table.add_row("Keyword Difficulty", f"{result.keyword_difficulty:.1f}")
|
||||
metrics_table.add_row("CPC", f"${result.cpc:.2f}")
|
||||
console.print(metrics_table)
|
||||
console.print()
|
||||
|
||||
# SERP Features
|
||||
feat = result.serp_features
|
||||
feat_table = Table(title="SERP Features", show_lines=True)
|
||||
feat_table.add_column("Feature", style="cyan")
|
||||
feat_table.add_column("Present", style="green")
|
||||
feat_table.add_row("Featured Snippet", _bool_icon(feat.featured_snippet))
|
||||
feat_table.add_row("People Also Ask", _bool_icon(feat.people_also_ask))
|
||||
feat_table.add_row("Local Pack", _bool_icon(feat.local_pack))
|
||||
feat_table.add_row("Knowledge Panel", _bool_icon(feat.knowledge_panel))
|
||||
feat_table.add_row("Video Carousel", _bool_icon(feat.video_carousel))
|
||||
feat_table.add_row("Image Pack", _bool_icon(feat.image_pack))
|
||||
feat_table.add_row("Site Links", _bool_icon(feat.site_links))
|
||||
feat_table.add_row("Shopping", _bool_icon(feat.shopping))
|
||||
feat_table.add_row("Ads (top)", str(feat.ads_top))
|
||||
feat_table.add_row("Ads (bottom)", str(feat.ads_bottom))
|
||||
console.print(feat_table)
|
||||
console.print()
|
||||
|
||||
# Competitors
|
||||
if result.competitors:
|
||||
comp_table = Table(title="Top Competitors", show_lines=True)
|
||||
comp_table.add_column("#", style="bold")
|
||||
comp_table.add_column("Domain", style="cyan")
|
||||
comp_table.add_column("Type", style="magenta")
|
||||
comp_table.add_column("CTR Share", style="green")
|
||||
comp_table.add_column("Featured", style="yellow")
|
||||
for c in result.competitors[:10]:
|
||||
comp_table.add_row(
|
||||
str(c.position),
|
||||
c.domain,
|
||||
c.content_type,
|
||||
f"{c.estimated_traffic_share:.1%}",
|
||||
_bool_icon(c.is_featured),
|
||||
)
|
||||
console.print(comp_table)
|
||||
console.print()
|
||||
|
||||
# Content Distribution
|
||||
if result.content_type_distribution:
|
||||
dist_table = Table(title="Content Type Distribution (Top 10)", show_lines=True)
|
||||
dist_table.add_column("Content Type", style="cyan")
|
||||
dist_table.add_column("Count", style="green")
|
||||
for ct, count in result.content_type_distribution.items():
|
||||
dist_table.add_row(ct, str(count))
|
||||
console.print(dist_table)
|
||||
console.print()
|
||||
|
||||
# Summary
|
||||
opp_color = "green" if result.opportunity_score >= 60 else (
|
||||
"yellow" if result.opportunity_score >= 40 else "red"
|
||||
)
|
||||
console.print(f"Opportunity Score: [{opp_color}]{result.opportunity_score}/100[/{opp_color}]")
|
||||
console.print(f"Search Intent: [bold]{result.intent_signals}[/bold]")
|
||||
console.print(f"SERP Volatility: [bold]{result.volatility}[/bold]")
|
||||
console.rule()
|
||||
|
||||
|
||||
def _bool_icon(val: bool) -> str:
|
||||
"""Return Yes/No string for boolean values."""
|
||||
return "Yes" if val else "No"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Google SERP feature detection and competitor mapping",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python serp_analyzer.py --keyword "치과 임플란트" --country kr --json
|
||||
python serp_analyzer.py --keywords-file keywords.txt --country kr --output report.json
|
||||
""",
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument(
|
||||
"--keyword",
|
||||
type=str,
|
||||
help="Single keyword to analyze",
|
||||
)
|
||||
group.add_argument(
|
||||
"--keywords-file",
|
||||
type=str,
|
||||
help="Path to file with one keyword per line",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--country",
|
||||
type=str,
|
||||
default="us",
|
||||
help="Country code for SERP (default: us)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
dest="json_output",
|
||||
help="Output results as JSON",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
help="Write JSON results to file",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def load_keywords(filepath: str) -> list[str]:
|
||||
"""Load keywords from a text file, one per line."""
|
||||
path = Path(filepath)
|
||||
if not path.exists():
|
||||
logger.error(f"Keywords file not found: {filepath}")
|
||||
sys.exit(1)
|
||||
keywords = []
|
||||
with open(path, "r", encoding="utf-8") as fh:
|
||||
for line in fh:
|
||||
kw = line.strip()
|
||||
if kw and not kw.startswith("#"):
|
||||
keywords.append(kw)
|
||||
logger.info(f"Loaded {len(keywords)} keywords from {filepath}")
|
||||
return keywords
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = SerpAnalyzer()
|
||||
|
||||
# Collect keywords
|
||||
if args.keyword:
|
||||
keywords = [args.keyword]
|
||||
else:
|
||||
keywords = load_keywords(args.keywords_file)
|
||||
|
||||
if not keywords:
|
||||
logger.error("No keywords to analyze")
|
||||
sys.exit(1)
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
for kw in keywords:
|
||||
console.print(f"\n[bold]Analyzing:[/bold] {kw}")
|
||||
result = analyzer.analyze(kw, args.country)
|
||||
|
||||
if args.json_output or args.output:
|
||||
results.append(result_to_dict(result))
|
||||
else:
|
||||
print_rich_report(result)
|
||||
|
||||
# JSON output
|
||||
if args.json_output:
|
||||
output_data = results[0] if len(results) == 1 else results
|
||||
print(json.dumps(output_data, ensure_ascii=False, indent=2))
|
||||
|
||||
if args.output:
|
||||
output_data = results[0] if len(results) == 1 else results
|
||||
output_path = Path(args.output)
|
||||
with open(output_path, "w", encoding="utf-8") as fh:
|
||||
json.dump(output_data, fh, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Results written to {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user