Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,694 @@
|
||||
"""
|
||||
Content Gap Analyzer - Topic Gap Detection & Cluster Mapping
|
||||
=============================================================
|
||||
Purpose: Identify content gaps vs competitors, build topic clusters,
|
||||
and generate prioritized editorial calendars.
|
||||
Python: 3.10+
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.cluster import AgglomerativeClustering
|
||||
|
||||
from base_client import BaseAsyncClient, config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class TopicGap:
|
||||
"""A topic present in competitors but missing from target."""
|
||||
topic: str
|
||||
competitor_urls: list[str] = field(default_factory=list)
|
||||
competitor_keywords: list[str] = field(default_factory=list)
|
||||
estimated_traffic: int = 0
|
||||
priority_score: float = 0.0
|
||||
difficulty: str = "medium"
|
||||
content_type_suggestion: str = "blog"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TopicCluster:
|
||||
"""Topic cluster with pillar and supporting cluster pages."""
|
||||
pillar_topic: str
|
||||
pillar_keyword: str = ""
|
||||
cluster_topics: list[str] = field(default_factory=list)
|
||||
cluster_keywords: list[str] = field(default_factory=list)
|
||||
total_volume: int = 0
|
||||
coverage_score: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalendarEntry:
|
||||
"""Prioritized editorial calendar entry."""
|
||||
topic: str
|
||||
priority: str = "medium"
|
||||
target_date: str = ""
|
||||
content_type: str = "blog"
|
||||
target_word_count: int = 1500
|
||||
primary_keyword: str = ""
|
||||
estimated_traffic: int = 0
|
||||
cluster_name: str = ""
|
||||
notes: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContentGapResult:
|
||||
"""Full content gap analysis result."""
|
||||
target_url: str
|
||||
competitor_urls: list[str] = field(default_factory=list)
|
||||
timestamp: str = ""
|
||||
target_topics_count: int = 0
|
||||
competitor_topics_count: int = 0
|
||||
gaps: list[TopicGap] = field(default_factory=list)
|
||||
clusters: list[TopicCluster] = field(default_factory=list)
|
||||
calendar: list[CalendarEntry] = field(default_factory=list)
|
||||
content_volume_comparison: dict[str, int] = field(default_factory=dict)
|
||||
korean_opportunities: list[dict[str, Any]] = field(default_factory=dict)
|
||||
recommendations: list[str] = field(default_factory=list)
|
||||
errors: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Korean opportunity patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
KOREAN_OPPORTUNITY_PATTERNS = [
|
||||
{"pattern": r"후기|리뷰", "label": "review_content", "description": "후기/리뷰 콘텐츠"},
|
||||
{"pattern": r"비용|가격|견적", "label": "pricing_content", "description": "비용/가격 정보 콘텐츠"},
|
||||
{"pattern": r"비교|차이", "label": "comparison_content", "description": "비교 콘텐츠"},
|
||||
{"pattern": r"추천|베스트|TOP", "label": "recommendation_content", "description": "추천/리스트 콘텐츠"},
|
||||
{"pattern": r"방법|하는\s*법|가이드", "label": "how_to_content", "description": "가이드/방법 콘텐츠"},
|
||||
{"pattern": r"부작용|주의|위험", "label": "safety_content", "description": "안전/부작용 정보"},
|
||||
{"pattern": r"효과|결과|전후", "label": "results_content", "description": "효과/결과 콘텐츠"},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ContentGapAnalyzer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ContentGapAnalyzer(BaseAsyncClient):
|
||||
"""Analyze content gaps between target and competitor sites."""
|
||||
|
||||
def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
|
||||
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ahrefs data retrieval
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def get_competitor_topics(self, competitor_url: str, limit: int = 100) -> list[dict]:
|
||||
"""
|
||||
Get top pages and keywords for a competitor via Ahrefs.
|
||||
|
||||
Returns list of dicts: url, traffic, keywords, top_keyword, title.
|
||||
"""
|
||||
self.logger.info(f"Fetching competitor topics for {competitor_url}")
|
||||
target = urlparse(competitor_url).netloc or competitor_url
|
||||
|
||||
try:
|
||||
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||
if not api_key:
|
||||
self.logger.warning("AHREFS_API_KEY not set; returning empty competitor topics")
|
||||
return []
|
||||
|
||||
resp = requests.get(
|
||||
"https://api.ahrefs.com/v3/site-explorer/top-pages",
|
||||
params={
|
||||
"target": target,
|
||||
"limit": limit,
|
||||
"select": "url,traffic,keywords,value,top_keyword",
|
||||
},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
pages = data.get("pages", data.get("items", []))
|
||||
self.logger.info(f"Retrieved {len(pages)} competitor topics from {competitor_url}")
|
||||
return pages
|
||||
except Exception as exc:
|
||||
self.logger.warning(f"Failed to get competitor topics for {competitor_url}: {exc}")
|
||||
return []
|
||||
|
||||
async def get_target_keywords(self, target_url: str, limit: int = 200) -> set[str]:
|
||||
"""Get the set of keywords the target site already ranks for."""
|
||||
self.logger.info(f"Fetching target keywords for {target_url}")
|
||||
target = urlparse(target_url).netloc or target_url
|
||||
|
||||
try:
|
||||
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||
if not api_key:
|
||||
return set()
|
||||
|
||||
resp = requests.get(
|
||||
"https://api.ahrefs.com/v3/site-explorer/organic-keywords",
|
||||
params={"target": target, "limit": limit, "select": "keyword,position,traffic"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
keywords = data.get("keywords", data.get("items", []))
|
||||
return {kw.get("keyword", "").lower() for kw in keywords if kw.get("keyword")}
|
||||
except Exception as exc:
|
||||
self.logger.warning(f"Failed to get target keywords: {exc}")
|
||||
return set()
|
||||
|
||||
async def get_organic_competitors(self, target_url: str, limit: int = 10) -> list[str]:
|
||||
"""Discover organic competitors via Ahrefs."""
|
||||
self.logger.info(f"Discovering organic competitors for {target_url}")
|
||||
target = urlparse(target_url).netloc or target_url
|
||||
|
||||
try:
|
||||
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||
if not api_key:
|
||||
return []
|
||||
|
||||
resp = requests.get(
|
||||
"https://api.ahrefs.com/v3/site-explorer/organic-competitors",
|
||||
params={"target": target, "limit": limit},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
competitors = data.get("competitors", data.get("items", []))
|
||||
return [c.get("domain", "") for c in competitors if c.get("domain")]
|
||||
except Exception as exc:
|
||||
self.logger.warning(f"Failed to discover competitors: {exc}")
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Gap analysis
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def find_topic_gaps(
|
||||
self,
|
||||
target_url: str,
|
||||
competitor_urls: list[str],
|
||||
) -> tuple[list[TopicGap], set[str], dict[str, int]]:
|
||||
"""
|
||||
Identify topics covered by competitors but missing from target.
|
||||
|
||||
Returns:
|
||||
- List of TopicGap objects.
|
||||
- Set of target keywords (for reference).
|
||||
- Content volume comparison dict.
|
||||
"""
|
||||
# Gather target keywords
|
||||
target_keywords = await self.get_target_keywords(target_url)
|
||||
|
||||
# Gather competitor data in parallel
|
||||
competitor_tasks = [self.get_competitor_topics(c_url) for c_url in competitor_urls]
|
||||
competitor_results = await asyncio.gather(*competitor_tasks, return_exceptions=True)
|
||||
|
||||
# Build competitor topic map
|
||||
competitor_topic_map: dict[str, TopicGap] = {}
|
||||
content_volume: dict[str, int] = {target_url: len(target_keywords)}
|
||||
|
||||
for c_url, c_result in zip(competitor_urls, competitor_results):
|
||||
if isinstance(c_result, Exception):
|
||||
self.logger.warning(f"Error fetching {c_url}: {c_result}")
|
||||
continue
|
||||
|
||||
pages = c_result if isinstance(c_result, list) else []
|
||||
content_volume[c_url] = len(pages)
|
||||
|
||||
for page in pages:
|
||||
top_keyword = page.get("top_keyword", "").strip().lower()
|
||||
if not top_keyword:
|
||||
continue
|
||||
|
||||
# Skip if target already covers this keyword
|
||||
if top_keyword in target_keywords:
|
||||
continue
|
||||
|
||||
# Check for fuzzy matches (keyword contained in target set)
|
||||
is_covered = any(
|
||||
top_keyword in tk or tk in top_keyword
|
||||
for tk in target_keywords
|
||||
if len(tk) > 3
|
||||
)
|
||||
if is_covered:
|
||||
continue
|
||||
|
||||
if top_keyword not in competitor_topic_map:
|
||||
competitor_topic_map[top_keyword] = TopicGap(
|
||||
topic=top_keyword,
|
||||
estimated_traffic=int(page.get("traffic", 0)),
|
||||
)
|
||||
|
||||
gap = competitor_topic_map[top_keyword]
|
||||
gap.competitor_urls.append(page.get("url", c_url))
|
||||
gap.competitor_keywords.append(top_keyword)
|
||||
gap.estimated_traffic = max(gap.estimated_traffic, int(page.get("traffic", 0)))
|
||||
|
||||
# Score gaps
|
||||
gaps = list(competitor_topic_map.values())
|
||||
for gap in gaps:
|
||||
competitor_count = len(set(gap.competitor_urls))
|
||||
traffic_score = min(100, math.log10(max(gap.estimated_traffic, 1)) / math.log10(10000) * 100)
|
||||
competition_score = (competitor_count / max(len(competitor_urls), 1)) * 100
|
||||
gap.priority_score = round((traffic_score * 0.6) + (competition_score * 0.4), 1)
|
||||
|
||||
# Difficulty estimation
|
||||
if competitor_count >= 3:
|
||||
gap.difficulty = "high"
|
||||
elif competitor_count >= 2:
|
||||
gap.difficulty = "medium"
|
||||
else:
|
||||
gap.difficulty = "low"
|
||||
|
||||
# Content type suggestion
|
||||
gap.content_type_suggestion = self._suggest_content_type(gap.topic)
|
||||
|
||||
gaps.sort(key=lambda g: g.priority_score, reverse=True)
|
||||
return gaps, target_keywords, content_volume
|
||||
|
||||
@staticmethod
|
||||
def _suggest_content_type(topic: str) -> str:
|
||||
"""Suggest content type based on topic keywords."""
|
||||
topic_lower = topic.lower()
|
||||
if any(w in topic_lower for w in ["how to", "guide", "tutorial", "방법", "가이드"]):
|
||||
return "guide"
|
||||
if any(w in topic_lower for w in ["best", "top", "review", "추천", "후기", "비교"]):
|
||||
return "listicle"
|
||||
if any(w in topic_lower for w in ["what is", "이란", "뜻", "의미"]):
|
||||
return "informational"
|
||||
if any(w in topic_lower for w in ["cost", "price", "비용", "가격"]):
|
||||
return "landing"
|
||||
return "blog"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Topic cluster mapping
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def build_topic_clusters(
|
||||
self,
|
||||
topics: list[str],
|
||||
n_clusters: int | None = None,
|
||||
min_cluster_size: int = 3,
|
||||
) -> list[TopicCluster]:
|
||||
"""
|
||||
Group topics into pillar/cluster structure using TF-IDF + hierarchical clustering.
|
||||
|
||||
Args:
|
||||
topics: List of topic strings.
|
||||
n_clusters: Number of clusters (auto-detected if None).
|
||||
min_cluster_size: Minimum topics per cluster.
|
||||
|
||||
Returns:
|
||||
List of TopicCluster objects.
|
||||
"""
|
||||
if len(topics) < min_cluster_size:
|
||||
self.logger.warning("Too few topics for clustering")
|
||||
return []
|
||||
|
||||
# Vectorize topics
|
||||
vectorizer = TfidfVectorizer(
|
||||
max_features=500,
|
||||
stop_words="english",
|
||||
ngram_range=(1, 2),
|
||||
)
|
||||
|
||||
try:
|
||||
tfidf_matrix = vectorizer.fit_transform(topics)
|
||||
except ValueError as exc:
|
||||
self.logger.warning(f"TF-IDF vectorization failed: {exc}")
|
||||
return []
|
||||
|
||||
# Auto-detect cluster count
|
||||
if n_clusters is None:
|
||||
n_clusters = max(2, min(len(topics) // 5, 15))
|
||||
|
||||
n_clusters = min(n_clusters, len(topics) - 1)
|
||||
|
||||
# Hierarchical clustering
|
||||
clustering = AgglomerativeClustering(
|
||||
n_clusters=n_clusters,
|
||||
metric="cosine",
|
||||
linkage="average",
|
||||
)
|
||||
labels = clustering.fit_predict(tfidf_matrix.toarray())
|
||||
|
||||
# Build cluster objects
|
||||
cluster_map: dict[int, list[str]] = defaultdict(list)
|
||||
for topic, label in zip(topics, labels):
|
||||
cluster_map[label].append(topic)
|
||||
|
||||
clusters: list[TopicCluster] = []
|
||||
for label, cluster_topics in sorted(cluster_map.items()):
|
||||
if len(cluster_topics) < min_cluster_size:
|
||||
continue
|
||||
|
||||
# Pick the longest topic as pillar (usually broader)
|
||||
pillar = max(cluster_topics, key=len)
|
||||
subtopics = [t for t in cluster_topics if t != pillar]
|
||||
|
||||
cluster = TopicCluster(
|
||||
pillar_topic=pillar,
|
||||
pillar_keyword=pillar,
|
||||
cluster_topics=subtopics[:20],
|
||||
cluster_keywords=[t for t in subtopics[:20]],
|
||||
total_volume=0,
|
||||
coverage_score=0.0,
|
||||
)
|
||||
clusters.append(cluster)
|
||||
|
||||
clusters.sort(key=lambda c: len(c.cluster_topics), reverse=True)
|
||||
return clusters
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Editorial calendar generation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def generate_calendar(
|
||||
self,
|
||||
gaps: list[TopicGap],
|
||||
clusters: list[TopicCluster],
|
||||
weeks_ahead: int = 12,
|
||||
entries_per_week: int = 2,
|
||||
) -> list[CalendarEntry]:
|
||||
"""
|
||||
Generate prioritized editorial calendar from gaps and clusters.
|
||||
|
||||
Args:
|
||||
gaps: List of topic gaps (sorted by priority).
|
||||
clusters: List of topic clusters.
|
||||
weeks_ahead: Number of weeks to plan.
|
||||
entries_per_week: Content pieces per week.
|
||||
|
||||
Returns:
|
||||
List of CalendarEntry objects.
|
||||
"""
|
||||
calendar: list[CalendarEntry] = []
|
||||
today = datetime.now()
|
||||
|
||||
# Build cluster lookup
|
||||
topic_to_cluster: dict[str, str] = {}
|
||||
for cluster in clusters:
|
||||
for topic in cluster.cluster_topics:
|
||||
topic_to_cluster[topic] = cluster.pillar_topic
|
||||
topic_to_cluster[cluster.pillar_topic] = cluster.pillar_topic
|
||||
|
||||
# Prioritize: pillar topics first, then by priority score
|
||||
pillar_topics = {c.pillar_topic for c in clusters}
|
||||
pillar_gaps = [g for g in gaps if g.topic in pillar_topics]
|
||||
other_gaps = [g for g in gaps if g.topic not in pillar_topics]
|
||||
ordered_gaps = pillar_gaps + other_gaps
|
||||
|
||||
max_entries = weeks_ahead * entries_per_week
|
||||
week_offset = 0
|
||||
slot_in_week = 0
|
||||
|
||||
for gap in ordered_gaps[:max_entries]:
|
||||
target_date = today + timedelta(weeks=week_offset, days=slot_in_week * 3)
|
||||
|
||||
# Determine priority label
|
||||
if gap.priority_score >= 70:
|
||||
priority = "high"
|
||||
elif gap.priority_score >= 40:
|
||||
priority = "medium"
|
||||
else:
|
||||
priority = "low"
|
||||
|
||||
# Word count based on content type
|
||||
word_count_map = {
|
||||
"guide": 2500,
|
||||
"listicle": 2000,
|
||||
"informational": 1800,
|
||||
"landing": 1200,
|
||||
"blog": 1500,
|
||||
}
|
||||
|
||||
entry = CalendarEntry(
|
||||
topic=gap.topic,
|
||||
priority=priority,
|
||||
target_date=target_date.strftime("%Y-%m-%d"),
|
||||
content_type=gap.content_type_suggestion,
|
||||
target_word_count=word_count_map.get(gap.content_type_suggestion, 1500),
|
||||
primary_keyword=gap.topic,
|
||||
estimated_traffic=gap.estimated_traffic,
|
||||
cluster_name=topic_to_cluster.get(gap.topic, "uncategorized"),
|
||||
)
|
||||
calendar.append(entry)
|
||||
|
||||
slot_in_week += 1
|
||||
if slot_in_week >= entries_per_week:
|
||||
slot_in_week = 0
|
||||
week_offset += 1
|
||||
|
||||
return calendar
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Korean opportunity detection
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def detect_korean_opportunities(gaps: list[TopicGap]) -> list[dict[str, Any]]:
|
||||
"""Detect Korean-market content opportunities in gaps."""
|
||||
opportunities: list[dict[str, Any]] = []
|
||||
|
||||
for gap in gaps:
|
||||
for pattern_info in KOREAN_OPPORTUNITY_PATTERNS:
|
||||
if re.search(pattern_info["pattern"], gap.topic, re.IGNORECASE):
|
||||
opportunities.append({
|
||||
"topic": gap.topic,
|
||||
"pattern": pattern_info["label"],
|
||||
"description": pattern_info["description"],
|
||||
"estimated_traffic": gap.estimated_traffic,
|
||||
"priority_score": gap.priority_score,
|
||||
})
|
||||
break
|
||||
|
||||
opportunities.sort(key=lambda o: o["priority_score"], reverse=True)
|
||||
return opportunities
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Orchestration
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
target_url: str,
|
||||
competitor_urls: list[str],
|
||||
build_clusters: bool = False,
|
||||
) -> ContentGapResult:
|
||||
"""
|
||||
Run full content gap analysis.
|
||||
|
||||
Args:
|
||||
target_url: Target website URL.
|
||||
competitor_urls: List of competitor URLs.
|
||||
build_clusters: Whether to build topic clusters.
|
||||
|
||||
Returns:
|
||||
ContentGapResult with gaps, clusters, and calendar.
|
||||
"""
|
||||
result = ContentGapResult(
|
||||
target_url=target_url,
|
||||
competitor_urls=competitor_urls,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
f"Starting gap analysis: {target_url} vs {len(competitor_urls)} competitors"
|
||||
)
|
||||
|
||||
# 1. Find topic gaps
|
||||
gaps, target_keywords, content_volume = await self.find_topic_gaps(
|
||||
target_url, competitor_urls
|
||||
)
|
||||
|
||||
result.gaps = gaps
|
||||
result.target_topics_count = len(target_keywords)
|
||||
result.competitor_topics_count = sum(content_volume.get(c, 0) for c in competitor_urls)
|
||||
result.content_volume_comparison = content_volume
|
||||
|
||||
# 2. Build topic clusters if requested
|
||||
if build_clusters and gaps:
|
||||
all_topics = [g.topic for g in gaps]
|
||||
result.clusters = self.build_topic_clusters(all_topics)
|
||||
|
||||
# 3. Generate editorial calendar
|
||||
result.calendar = self.generate_calendar(gaps, result.clusters)
|
||||
|
||||
# 4. Detect Korean opportunities
|
||||
result.korean_opportunities = self.detect_korean_opportunities(gaps)
|
||||
|
||||
# 5. Recommendations
|
||||
result.recommendations = self._generate_recommendations(result)
|
||||
|
||||
self.logger.info(
|
||||
f"Gap analysis complete: {len(gaps)} gaps, "
|
||||
f"{len(result.clusters)} clusters, "
|
||||
f"{len(result.calendar)} calendar entries"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _generate_recommendations(result: ContentGapResult) -> list[str]:
|
||||
"""Generate strategic recommendations from gap analysis."""
|
||||
recs: list[str] = []
|
||||
|
||||
gap_count = len(result.gaps)
|
||||
if gap_count > 50:
|
||||
recs.append(
|
||||
f"경쟁사 대비 {gap_count}개의 콘텐츠 격차가 발견되었습니다. "
|
||||
"우선순위 상위 20개 주제부터 콘텐츠 생성을 시작하세요."
|
||||
)
|
||||
elif gap_count > 20:
|
||||
recs.append(
|
||||
f"{gap_count}개의 콘텐츠 격차가 있습니다. "
|
||||
"높은 트래픽 기회부터 순차적으로 콘텐츠를 생성하세요."
|
||||
)
|
||||
elif gap_count > 0:
|
||||
recs.append(
|
||||
f"{gap_count}개의 콘텐츠 격차가 발견되었습니다. "
|
||||
"비교적 적은 격차이므로 빠른 시일 내 모두 커버할 수 있습니다."
|
||||
)
|
||||
|
||||
if result.clusters:
|
||||
recs.append(
|
||||
f"{len(result.clusters)}개의 토픽 클러스터를 구성했습니다. "
|
||||
"필러 콘텐츠부터 작성하여 내부 링크 구조를 강화하세요."
|
||||
)
|
||||
|
||||
if result.korean_opportunities:
|
||||
recs.append(
|
||||
f"한국어 시장 기회가 {len(result.korean_opportunities)}개 발견되었습니다. "
|
||||
"후기, 비용, 비교 콘텐츠는 한국 검색 시장에서 높은 전환율을 보입니다."
|
||||
)
|
||||
|
||||
high_priority = [g for g in result.gaps if g.priority_score >= 70]
|
||||
if high_priority:
|
||||
top_topics = ", ".join(g.topic for g in high_priority[:3])
|
||||
recs.append(
|
||||
f"최우선 주제: {top_topics}. "
|
||||
"이 주제들은 높은 트래픽 잠재력과 경쟁사 커버리지를 가지고 있습니다."
|
||||
)
|
||||
|
||||
if not recs:
|
||||
recs.append("경쟁사 대비 콘텐츠 커버리지가 양호합니다. 기존 콘텐츠 최적화에 집중하세요.")
|
||||
|
||||
return recs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="SEO Content Gap Analyzer - topic gaps, clusters, calendar",
|
||||
)
|
||||
parser.add_argument("--target", required=True, help="Target website URL")
|
||||
parser.add_argument(
|
||||
"--competitor", action="append", dest="competitors", required=True,
|
||||
help="Competitor URL (can be repeated)",
|
||||
)
|
||||
parser.add_argument("--clusters", action="store_true", help="Build topic clusters")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--output", help="Save output to file")
|
||||
return parser
|
||||
|
||||
|
||||
def format_text_report(result: ContentGapResult) -> str:
|
||||
"""Format gap analysis result as human-readable text."""
|
||||
lines: list[str] = []
|
||||
lines.append(f"## Content Gap Analysis: {result.target_url}")
|
||||
lines.append(f"**Date**: {result.timestamp[:10]}")
|
||||
lines.append(f"**Competitors**: {', '.join(result.competitor_urls)}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("### Content Volume Comparison")
|
||||
for site, count in result.content_volume_comparison.items():
|
||||
lines.append(f" - {site}: {count} topics")
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"### Topic Gaps ({len(result.gaps)} found)")
|
||||
for i, gap in enumerate(result.gaps[:20], 1):
|
||||
lines.append(
|
||||
f" {i}. [{gap.priority_score:.0f}] {gap.topic} "
|
||||
f"(traffic: {gap.estimated_traffic}, difficulty: {gap.difficulty})"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
if result.clusters:
|
||||
lines.append(f"### Topic Clusters ({len(result.clusters)})")
|
||||
for i, cluster in enumerate(result.clusters, 1):
|
||||
lines.append(f" {i}. **{cluster.pillar_topic}** ({len(cluster.cluster_topics)} subtopics)")
|
||||
for sub in cluster.cluster_topics[:5]:
|
||||
lines.append(f" - {sub}")
|
||||
lines.append("")
|
||||
|
||||
if result.calendar:
|
||||
lines.append(f"### Editorial Calendar ({len(result.calendar)} entries)")
|
||||
for entry in result.calendar[:15]:
|
||||
lines.append(
|
||||
f" - [{entry.target_date}] {entry.topic} "
|
||||
f"({entry.content_type}, {entry.target_word_count}w, priority: {entry.priority})"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
if result.korean_opportunities:
|
||||
lines.append(f"### Korean Market Opportunities ({len(result.korean_opportunities)})")
|
||||
for opp in result.korean_opportunities[:10]:
|
||||
lines.append(f" - {opp['topic']} ({opp['description']})")
|
||||
lines.append("")
|
||||
|
||||
lines.append("### Recommendations")
|
||||
for i, rec in enumerate(result.recommendations, 1):
|
||||
lines.append(f" {i}. {rec}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
analyzer = ContentGapAnalyzer()
|
||||
result = await analyzer.analyze(
|
||||
target_url=args.target,
|
||||
competitor_urls=args.competitors,
|
||||
build_clusters=args.clusters,
|
||||
)
|
||||
|
||||
if args.json:
|
||||
output = json.dumps(asdict(result), ensure_ascii=False, indent=2, default=str)
|
||||
else:
|
||||
output = format_text_report(result)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(output)
|
||||
logger.info(f"Output saved to {args.output}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
analyzer.print_stats()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user