12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
739 lines
28 KiB
Python
739 lines
28 KiB
Python
"""
|
|
Content Brief Generator - SEO Content Brief Creation
|
|
=====================================================
|
|
Purpose: Generate detailed SEO content briefs with outlines,
|
|
keyword lists, word count targets, and internal linking suggestions.
|
|
Python: 3.10+
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import math
|
|
import re
|
|
import sys
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime
|
|
from typing import Any
|
|
from urllib.parse import urlparse
|
|
|
|
import aiohttp
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
from base_client import BaseAsyncClient, config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data classes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class OutlineSection:
|
|
"""A single heading section in the content outline."""
|
|
heading: str
|
|
level: int = 2 # H2 or H3
|
|
talking_points: list[str] = field(default_factory=list)
|
|
target_words: int = 200
|
|
keywords_to_include: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class CompetitorPageAnalysis:
|
|
"""Analysis of a single competitor page for the target keyword."""
|
|
url: str
|
|
title: str = ""
|
|
word_count: int = 0
|
|
headings: list[dict[str, str]] = field(default_factory=list)
|
|
topics_covered: list[str] = field(default_factory=list)
|
|
content_type: str = ""
|
|
has_images: bool = False
|
|
has_video: bool = False
|
|
has_faq: bool = False
|
|
has_table: bool = False
|
|
|
|
|
|
@dataclass
|
|
class ContentBrief:
|
|
"""Complete SEO content brief."""
|
|
primary_keyword: str
|
|
secondary_keywords: list[str] = field(default_factory=list)
|
|
lsi_keywords: list[str] = field(default_factory=list)
|
|
target_word_count: int = 1500
|
|
word_count_range: tuple[int, int] = (1200, 1800)
|
|
suggested_title: str = ""
|
|
meta_description: str = ""
|
|
outline: list[OutlineSection] = field(default_factory=list)
|
|
competitor_analysis: list[CompetitorPageAnalysis] = field(default_factory=list)
|
|
internal_links: list[dict[str, str]] = field(default_factory=list)
|
|
content_format: str = "blog"
|
|
korean_format_recommendations: list[str] = field(default_factory=list)
|
|
search_intent: str = "informational"
|
|
notes: list[str] = field(default_factory=list)
|
|
timestamp: str = ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Search intent patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
INTENT_PATTERNS = {
|
|
"transactional": [
|
|
r"buy", r"purchase", r"price", r"cost", r"order", r"shop",
|
|
r"구매", r"주문", r"가격", r"비용", r"할인", r"쿠폰",
|
|
],
|
|
"navigational": [
|
|
r"login", r"sign in", r"official", r"website",
|
|
r"로그인", r"공식", r"홈페이지",
|
|
],
|
|
"commercial": [
|
|
r"best", r"top", r"review", r"compare", r"vs",
|
|
r"추천", r"비교", r"후기", r"리뷰", r"순위",
|
|
],
|
|
"informational": [
|
|
r"what", r"how", r"why", r"guide", r"tutorial",
|
|
r"이란", r"방법", r"가이드", r"효과", r"원인",
|
|
],
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Korean content format recommendations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
KOREAN_FORMAT_TIPS = {
|
|
"transactional": [
|
|
"가격 비교표를 포함하세요 (경쟁사 가격 대비)",
|
|
"실제 비용 사례를 3개 이상 제시하세요",
|
|
"결제 방법 및 할인 정보를 명확히 안내하세요",
|
|
"CTA(행동 유도) 버튼을 여러 위치에 배치하세요",
|
|
],
|
|
"commercial": [
|
|
"네이버 블로그 스타일의 솔직한 후기 톤을 사용하세요",
|
|
"장단점을 균형 있게 비교하세요",
|
|
"실제 사용 사진 또는 전후 비교 이미지를 포함하세요",
|
|
"별점 또는 점수 평가 체계를 추가하세요",
|
|
"FAQ 섹션을 포함하세요 (네이버 검색 노출에 유리)",
|
|
],
|
|
"informational": [
|
|
"핵심 정보를 글 상단에 요약하세요 (두괄식 구성)",
|
|
"전문 용어는 쉬운 설명을 병기하세요",
|
|
"인포그래픽 또는 도표를 활용하세요",
|
|
"관련 콘텐츠 내부 링크를 3-5개 포함하세요",
|
|
"전문가 인용 또는 출처를 명시하세요 (E-E-A-T 강화)",
|
|
],
|
|
"navigational": [
|
|
"공식 정보와 연락처를 최상단에 배치하세요",
|
|
"지도 임베드를 포함하세요 (네이버 지도/구글 맵)",
|
|
"영업시간, 주소, 전화번호를 명확히 표시하세요",
|
|
],
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ContentBriefGenerator
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ContentBriefGenerator(BaseAsyncClient):
|
|
"""Generate comprehensive SEO content briefs."""
|
|
|
|
def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
|
|
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
|
self.session: aiohttp.ClientSession | None = None
|
|
|
|
async def _ensure_session(self) -> aiohttp.ClientSession:
|
|
if self.session is None or self.session.closed:
|
|
timeout = aiohttp.ClientTimeout(total=30)
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (compatible; SEOContentBrief/1.0)",
|
|
}
|
|
self.session = aiohttp.ClientSession(timeout=timeout, headers=headers)
|
|
return self.session
|
|
|
|
async def close(self) -> None:
|
|
if self.session and not self.session.closed:
|
|
await self.session.close()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Analyze top ranking results
|
|
# ------------------------------------------------------------------
|
|
|
|
async def analyze_top_results(
|
|
self,
|
|
keyword: str,
|
|
site_url: str | None = None,
|
|
num_competitors: int = 5,
|
|
) -> list[CompetitorPageAnalysis]:
|
|
"""
|
|
Analyze top ranking pages for a keyword using Ahrefs SERP data.
|
|
|
|
Falls back to fetching pages directly if Ahrefs data is unavailable.
|
|
"""
|
|
self.logger.info(f"Analyzing top results for: {keyword}")
|
|
results: list[CompetitorPageAnalysis] = []
|
|
|
|
# Try Ahrefs organic keywords to find ranking pages
|
|
try:
|
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
|
if api_key:
|
|
resp = requests.get(
|
|
"https://api.ahrefs.com/v3/serp-overview",
|
|
params={"keyword": keyword, "select": "url,title,position,traffic"},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
timeout=30,
|
|
)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
serp_items = data.get("positions", data.get("items", []))[:num_competitors]
|
|
for item in serp_items:
|
|
analysis = CompetitorPageAnalysis(
|
|
url=item.get("url", ""),
|
|
title=item.get("title", ""),
|
|
)
|
|
results.append(analysis)
|
|
except Exception as exc:
|
|
self.logger.warning(f"Ahrefs SERP lookup failed: {exc}")
|
|
|
|
# Fetch and analyze each page
|
|
session = await self._ensure_session()
|
|
for analysis in results[:num_competitors]:
|
|
if not analysis.url:
|
|
continue
|
|
try:
|
|
async with session.get(analysis.url) as resp:
|
|
if resp.status != 200:
|
|
continue
|
|
html = await resp.text()
|
|
self._analyze_page_content(analysis, html)
|
|
except Exception as exc:
|
|
self.logger.debug(f"Failed to fetch {analysis.url}: {exc}")
|
|
|
|
self.logger.info(f"Analyzed {len(results)} competitor pages")
|
|
return results
|
|
|
|
@staticmethod
|
|
def _analyze_page_content(analysis: CompetitorPageAnalysis, html: str) -> None:
|
|
"""Parse HTML and extract content metrics."""
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
# Title
|
|
title_tag = soup.find("title")
|
|
if title_tag and not analysis.title:
|
|
analysis.title = title_tag.get_text(strip=True)
|
|
|
|
# Word count (visible text only)
|
|
for tag in soup(["script", "style", "nav", "header", "footer"]):
|
|
tag.decompose()
|
|
visible_text = soup.get_text(separator=" ", strip=True)
|
|
analysis.word_count = len(visible_text.split())
|
|
|
|
# Headings
|
|
headings: list[dict[str, str]] = []
|
|
for level in range(1, 7):
|
|
for h in soup.find_all(f"h{level}"):
|
|
text = h.get_text(strip=True)
|
|
if text:
|
|
headings.append({"level": f"H{level}", "text": text})
|
|
analysis.headings = headings
|
|
|
|
# Content features
|
|
analysis.has_images = len(soup.find_all("img")) > 2
|
|
analysis.has_video = bool(soup.find("video") or soup.find("iframe", src=re.compile(r"youtube|vimeo")))
|
|
analysis.has_faq = bool(
|
|
soup.find(string=re.compile(r"FAQ|자주\s*묻는\s*질문|Q\s*&\s*A", re.IGNORECASE))
|
|
or soup.find("script", type="application/ld+json", string=re.compile(r"FAQPage"))
|
|
)
|
|
analysis.has_table = bool(soup.find("table"))
|
|
|
|
# Topics covered (from H2 headings)
|
|
analysis.topics_covered = [
|
|
h["text"] for h in headings if h["level"] == "H2"
|
|
][:15]
|
|
|
|
# ------------------------------------------------------------------
|
|
# Extract content outline
|
|
# ------------------------------------------------------------------
|
|
|
|
def extract_outline(
|
|
self,
|
|
keyword: str,
|
|
top_results: list[CompetitorPageAnalysis],
|
|
) -> list[OutlineSection]:
|
|
"""
|
|
Build recommended H2/H3 outline by aggregating competitor headings.
|
|
|
|
Identifies common topics across top-ranking pages and structures
|
|
them into a logical outline.
|
|
"""
|
|
# Collect all H2 headings
|
|
h2_topics: dict[str, int] = {}
|
|
h3_by_h2: dict[str, list[str]] = {}
|
|
|
|
for result in top_results:
|
|
current_h2 = ""
|
|
for heading in result.headings:
|
|
text = heading["text"].strip()
|
|
if heading["level"] == "H2":
|
|
current_h2 = text
|
|
h2_topics[text] = h2_topics.get(text, 0) + 1
|
|
elif heading["level"] == "H3" and current_h2:
|
|
if current_h2 not in h3_by_h2:
|
|
h3_by_h2[current_h2] = []
|
|
h3_by_h2[current_h2].append(text)
|
|
|
|
# Sort H2s by frequency (most common topics first)
|
|
sorted_h2s = sorted(h2_topics.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
# Build outline
|
|
outline: list[OutlineSection] = []
|
|
target_word_count = self.calculate_word_count(top_results)
|
|
words_per_section = target_word_count // max(len(sorted_h2s), 5)
|
|
|
|
for h2_text, frequency in sorted_h2s[:8]:
|
|
section = OutlineSection(
|
|
heading=h2_text,
|
|
level=2,
|
|
target_words=words_per_section,
|
|
talking_points=[],
|
|
)
|
|
|
|
# Add H3 subtopics
|
|
if h2_text in h3_by_h2:
|
|
unique_h3s = list(dict.fromkeys(h3_by_h2[h2_text]))[:5]
|
|
for h3_text in unique_h3s:
|
|
subsection = OutlineSection(
|
|
heading=h3_text,
|
|
level=3,
|
|
target_words=words_per_section // 3,
|
|
)
|
|
section.talking_points.append(h3_text)
|
|
|
|
outline.append(section)
|
|
|
|
# Ensure FAQ section if common
|
|
faq_count = sum(1 for r in top_results if r.has_faq)
|
|
if faq_count >= 2 and not any("FAQ" in s.heading or "질문" in s.heading for s in outline):
|
|
outline.append(OutlineSection(
|
|
heading="자주 묻는 질문 (FAQ)",
|
|
level=2,
|
|
target_words=300,
|
|
talking_points=[
|
|
f"{keyword} 관련 자주 묻는 질문 5-7개",
|
|
"Schema markup (FAQPage) 적용 권장",
|
|
],
|
|
))
|
|
|
|
return outline
|
|
|
|
# ------------------------------------------------------------------
|
|
# Keyword suggestions
|
|
# ------------------------------------------------------------------
|
|
|
|
async def suggest_keywords(self, primary_keyword: str) -> dict[str, list[str]]:
|
|
"""
|
|
Generate primary, secondary, and LSI keyword suggestions.
|
|
|
|
Uses Ahrefs related keywords and matching terms data.
|
|
"""
|
|
self.logger.info(f"Generating keyword suggestions for: {primary_keyword}")
|
|
result = {
|
|
"primary": [primary_keyword],
|
|
"secondary": [],
|
|
"lsi": [],
|
|
}
|
|
|
|
try:
|
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
|
if not api_key:
|
|
self.logger.warning("AHREFS_API_KEY not set; returning basic keywords only")
|
|
return result
|
|
|
|
# Matching terms
|
|
resp = requests.get(
|
|
"https://api.ahrefs.com/v3/keywords-explorer/matching-terms",
|
|
params={"keyword": primary_keyword, "limit": 20, "select": "keyword,volume,difficulty"},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
timeout=30,
|
|
)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
terms = data.get("keywords", data.get("items", []))
|
|
for term in terms:
|
|
kw = term.get("keyword", "")
|
|
if kw and kw.lower() != primary_keyword.lower():
|
|
result["secondary"].append(kw)
|
|
|
|
# Related terms (LSI)
|
|
resp2 = requests.get(
|
|
"https://api.ahrefs.com/v3/keywords-explorer/related-terms",
|
|
params={"keyword": primary_keyword, "limit": 15, "select": "keyword,volume"},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
timeout=30,
|
|
)
|
|
if resp2.status_code == 200:
|
|
data2 = resp2.json()
|
|
related = data2.get("keywords", data2.get("items", []))
|
|
for term in related:
|
|
kw = term.get("keyword", "")
|
|
if kw and kw not in result["secondary"]:
|
|
result["lsi"].append(kw)
|
|
|
|
except Exception as exc:
|
|
self.logger.warning(f"Keyword suggestion lookup failed: {exc}")
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Word count calculation
|
|
# ------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def calculate_word_count(top_results: list[CompetitorPageAnalysis]) -> int:
|
|
"""
|
|
Calculate target word count based on top 5 ranking pages.
|
|
|
|
Returns the average word count of top 5 with +/- 20% range.
|
|
"""
|
|
word_counts = [r.word_count for r in top_results[:5] if r.word_count > 100]
|
|
|
|
if not word_counts:
|
|
return 1500 # Default fallback
|
|
|
|
avg = sum(word_counts) / len(word_counts)
|
|
# Round to nearest 100
|
|
target = round(avg / 100) * 100
|
|
return max(800, min(5000, target))
|
|
|
|
# ------------------------------------------------------------------
|
|
# Internal linking suggestions
|
|
# ------------------------------------------------------------------
|
|
|
|
async def suggest_internal_links(
|
|
self,
|
|
keyword: str,
|
|
site_url: str,
|
|
) -> list[dict[str, str]]:
|
|
"""
|
|
Find related existing pages on the site for internal linking.
|
|
|
|
Uses Ahrefs organic keywords to find pages ranking for related terms.
|
|
"""
|
|
self.logger.info(f"Finding internal link opportunities for {keyword} on {site_url}")
|
|
links: list[dict[str, str]] = []
|
|
target = urlparse(site_url).netloc or site_url
|
|
|
|
try:
|
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
|
if not api_key:
|
|
return links
|
|
|
|
resp = requests.get(
|
|
"https://api.ahrefs.com/v3/site-explorer/organic-keywords",
|
|
params={
|
|
"target": target,
|
|
"limit": 50,
|
|
"select": "keyword,url,position,traffic",
|
|
},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
timeout=30,
|
|
)
|
|
if resp.status_code != 200:
|
|
return links
|
|
|
|
data = resp.json()
|
|
keywords_data = data.get("keywords", data.get("items", []))
|
|
|
|
# Find pages ranking for related keywords
|
|
keyword_lower = keyword.lower()
|
|
keyword_words = set(keyword_lower.split())
|
|
|
|
seen_urls: set[str] = set()
|
|
for item in keywords_data:
|
|
kw = item.get("keyword", "").lower()
|
|
url = item.get("url", "")
|
|
|
|
if not url or url in seen_urls:
|
|
continue
|
|
|
|
# Check keyword relevance
|
|
kw_words = set(kw.split())
|
|
overlap = keyword_words & kw_words
|
|
if overlap and kw != keyword_lower:
|
|
links.append({
|
|
"url": url,
|
|
"anchor_text": kw,
|
|
"relevance": f"{len(overlap)}/{len(keyword_words)} word overlap",
|
|
"current_traffic": str(item.get("traffic", 0)),
|
|
})
|
|
seen_urls.add(url)
|
|
|
|
links.sort(key=lambda l: int(l.get("current_traffic", "0")), reverse=True)
|
|
|
|
except Exception as exc:
|
|
self.logger.warning(f"Internal link suggestion failed: {exc}")
|
|
|
|
return links[:10]
|
|
|
|
# ------------------------------------------------------------------
|
|
# Search intent detection
|
|
# ------------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def detect_search_intent(keyword: str) -> str:
|
|
"""Classify keyword search intent."""
|
|
keyword_lower = keyword.lower()
|
|
scores: dict[str, int] = {}
|
|
|
|
for intent, patterns in INTENT_PATTERNS.items():
|
|
score = sum(1 for p in patterns if re.search(p, keyword_lower, re.IGNORECASE))
|
|
if score > 0:
|
|
scores[intent] = score
|
|
|
|
if not scores:
|
|
return "informational"
|
|
return max(scores, key=scores.get)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Orchestration
|
|
# ------------------------------------------------------------------
|
|
|
|
async def generate(
|
|
self,
|
|
keyword: str,
|
|
site_url: str,
|
|
num_competitors: int = 5,
|
|
) -> ContentBrief:
|
|
"""
|
|
Generate a comprehensive SEO content brief.
|
|
|
|
Args:
|
|
keyword: Primary target keyword.
|
|
site_url: Target website URL.
|
|
num_competitors: Number of competitor pages to analyze.
|
|
|
|
Returns:
|
|
ContentBrief with outline, keywords, and recommendations.
|
|
"""
|
|
self.logger.info(f"Generating content brief for: {keyword}")
|
|
|
|
# Detect search intent
|
|
intent = self.detect_search_intent(keyword)
|
|
|
|
# Run analyses in parallel
|
|
top_results_task = self.analyze_top_results(keyword, site_url, num_competitors)
|
|
keywords_task = self.suggest_keywords(keyword)
|
|
internal_links_task = self.suggest_internal_links(keyword, site_url)
|
|
|
|
top_results, keyword_data, internal_links = await asyncio.gather(
|
|
top_results_task, keywords_task, internal_links_task,
|
|
)
|
|
|
|
# Calculate word count target
|
|
target_word_count = self.calculate_word_count(top_results)
|
|
word_count_min = int(target_word_count * 0.8)
|
|
word_count_max = int(target_word_count * 1.2)
|
|
|
|
# Build outline
|
|
outline = self.extract_outline(keyword, top_results)
|
|
|
|
# Generate title suggestion
|
|
suggested_title = self._generate_title(keyword, intent)
|
|
|
|
# Generate meta description
|
|
meta_description = self._generate_meta_description(keyword, intent)
|
|
|
|
# Korean format recommendations
|
|
korean_tips = KOREAN_FORMAT_TIPS.get(intent, KOREAN_FORMAT_TIPS["informational"])
|
|
|
|
brief = ContentBrief(
|
|
primary_keyword=keyword,
|
|
secondary_keywords=keyword_data.get("secondary", [])[:10],
|
|
lsi_keywords=keyword_data.get("lsi", [])[:10],
|
|
target_word_count=target_word_count,
|
|
word_count_range=(word_count_min, word_count_max),
|
|
suggested_title=suggested_title,
|
|
meta_description=meta_description,
|
|
outline=outline,
|
|
competitor_analysis=top_results,
|
|
internal_links=internal_links,
|
|
content_format=self._suggest_format(intent, top_results),
|
|
korean_format_recommendations=korean_tips,
|
|
search_intent=intent,
|
|
timestamp=datetime.now().isoformat(),
|
|
)
|
|
|
|
self.logger.info(
|
|
f"Brief generated: {len(outline)} sections, "
|
|
f"{target_word_count} target words, "
|
|
f"{len(keyword_data.get('secondary', []))} secondary keywords"
|
|
)
|
|
|
|
return brief
|
|
|
|
@staticmethod
|
|
def _generate_title(keyword: str, intent: str) -> str:
|
|
"""Generate a suggested title based on keyword and intent."""
|
|
templates = {
|
|
"informational": "{keyword} - 완벽 가이드 (2025년 최신)",
|
|
"commercial": "{keyword} 추천 TOP 10 비교 (전문가 리뷰)",
|
|
"transactional": "{keyword} 가격 비교 및 구매 가이드",
|
|
"navigational": "{keyword} - 공식 안내",
|
|
}
|
|
template = templates.get(intent, templates["informational"])
|
|
return template.format(keyword=keyword)
|
|
|
|
@staticmethod
|
|
def _generate_meta_description(keyword: str, intent: str) -> str:
|
|
"""Generate a suggested meta description."""
|
|
templates = {
|
|
"informational": (
|
|
f"{keyword}에 대해 알아야 할 모든 것을 정리했습니다. "
|
|
"전문가가 알려주는 핵심 정보와 실용적인 가이드를 확인하세요."
|
|
),
|
|
"commercial": (
|
|
f"{keyword} 비교 분석! 장단점, 가격, 실제 후기를 "
|
|
"한눈에 비교하고 최적의 선택을 하세요."
|
|
),
|
|
"transactional": (
|
|
f"{keyword} 최저가 비교 및 구매 방법을 안내합니다. "
|
|
"합리적인 가격으로 구매하는 팁을 확인하세요."
|
|
),
|
|
"navigational": (
|
|
f"{keyword} 공식 정보 및 이용 안내. "
|
|
"정확한 정보를 빠르게 확인하세요."
|
|
),
|
|
}
|
|
return templates.get(intent, templates["informational"])
|
|
|
|
@staticmethod
|
|
def _suggest_format(intent: str, results: list[CompetitorPageAnalysis]) -> str:
|
|
"""Suggest content format based on intent and competitor analysis."""
|
|
if intent == "commercial":
|
|
return "listicle"
|
|
if intent == "informational":
|
|
return "guide"
|
|
if intent == "transactional":
|
|
return "landing"
|
|
|
|
# Check competitor patterns
|
|
avg_word_count = (
|
|
sum(r.word_count for r in results) / len(results) if results else 0
|
|
)
|
|
if avg_word_count > 3000:
|
|
return "comprehensive_guide"
|
|
return "blog"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
description="SEO Content Brief Generator",
|
|
)
|
|
parser.add_argument("--keyword", required=True, help="Primary target keyword")
|
|
parser.add_argument("--url", required=True, help="Target website URL")
|
|
parser.add_argument("--competitors", type=int, default=5, help="Number of competitor pages to analyze (default: 5)")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
parser.add_argument("--output", help="Save output to file")
|
|
return parser
|
|
|
|
|
|
def format_text_report(brief: ContentBrief) -> str:
|
|
"""Format content brief as human-readable text."""
|
|
lines: list[str] = []
|
|
lines.append(f"## Content Brief: {brief.primary_keyword}")
|
|
lines.append(f"**Date**: {brief.timestamp[:10]}")
|
|
lines.append(f"**Search Intent**: {brief.search_intent}")
|
|
lines.append(f"**Content Format**: {brief.content_format}")
|
|
lines.append("")
|
|
|
|
lines.append("### Target Metrics")
|
|
lines.append(f"- Word count: {brief.target_word_count} ({brief.word_count_range[0]}-{brief.word_count_range[1]})")
|
|
lines.append(f"- Suggested title: {brief.suggested_title}")
|
|
lines.append(f"- Meta description: {brief.meta_description}")
|
|
lines.append("")
|
|
|
|
lines.append("### Keywords")
|
|
lines.append(f"- **Primary**: {brief.primary_keyword}")
|
|
if brief.secondary_keywords:
|
|
lines.append(f"- **Secondary**: {', '.join(brief.secondary_keywords[:8])}")
|
|
if brief.lsi_keywords:
|
|
lines.append(f"- **LSI**: {', '.join(brief.lsi_keywords[:8])}")
|
|
lines.append("")
|
|
|
|
lines.append("### Content Outline")
|
|
for section in brief.outline:
|
|
prefix = "##" if section.level == 2 else "###"
|
|
lines.append(f" {prefix} {section.heading} (~{section.target_words}w)")
|
|
for point in section.talking_points:
|
|
lines.append(f" - {point}")
|
|
lines.append("")
|
|
|
|
if brief.competitor_analysis:
|
|
lines.append(f"### Competitor Analysis ({len(brief.competitor_analysis)} pages)")
|
|
for comp in brief.competitor_analysis:
|
|
lines.append(f" - **{comp.title or comp.url}**")
|
|
lines.append(f" Word count: {comp.word_count} | Headings: {len(comp.headings)}")
|
|
features = []
|
|
if comp.has_images:
|
|
features.append("images")
|
|
if comp.has_video:
|
|
features.append("video")
|
|
if comp.has_faq:
|
|
features.append("FAQ")
|
|
if comp.has_table:
|
|
features.append("table")
|
|
if features:
|
|
lines.append(f" Features: {', '.join(features)}")
|
|
lines.append("")
|
|
|
|
if brief.internal_links:
|
|
lines.append(f"### Internal Linking Suggestions ({len(brief.internal_links)})")
|
|
for link in brief.internal_links[:7]:
|
|
lines.append(f" - [{link['anchor_text']}]({link['url']})")
|
|
lines.append("")
|
|
|
|
if brief.korean_format_recommendations:
|
|
lines.append("### Korean Content Format Recommendations")
|
|
for tip in brief.korean_format_recommendations:
|
|
lines.append(f" - {tip}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
async def main() -> None:
|
|
parser = build_parser()
|
|
args = parser.parse_args()
|
|
|
|
generator = ContentBriefGenerator()
|
|
try:
|
|
brief = await generator.generate(
|
|
keyword=args.keyword,
|
|
site_url=args.url,
|
|
num_competitors=args.competitors,
|
|
)
|
|
|
|
if args.json:
|
|
output = json.dumps(asdict(brief), ensure_ascii=False, indent=2, default=str)
|
|
else:
|
|
output = format_text_report(brief)
|
|
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
f.write(output)
|
|
logger.info(f"Output saved to {args.output}")
|
|
else:
|
|
print(output)
|
|
|
|
finally:
|
|
await generator.close()
|
|
generator.print_stats()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|