""" Content Brief Generator - SEO Content Brief Creation ===================================================== Purpose: Generate detailed SEO content briefs with outlines, keyword lists, word count targets, and internal linking suggestions. Python: 3.10+ """ import argparse import asyncio import json import logging import math import re import sys from dataclasses import asdict, dataclass, field from datetime import datetime from typing import Any from urllib.parse import urlparse import aiohttp import requests from bs4 import BeautifulSoup from base_client import BaseAsyncClient, config logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Data classes # --------------------------------------------------------------------------- @dataclass class OutlineSection: """A single heading section in the content outline.""" heading: str level: int = 2 # H2 or H3 talking_points: list[str] = field(default_factory=list) target_words: int = 200 keywords_to_include: list[str] = field(default_factory=list) @dataclass class CompetitorPageAnalysis: """Analysis of a single competitor page for the target keyword.""" url: str title: str = "" word_count: int = 0 headings: list[dict[str, str]] = field(default_factory=list) topics_covered: list[str] = field(default_factory=list) content_type: str = "" has_images: bool = False has_video: bool = False has_faq: bool = False has_table: bool = False @dataclass class ContentBrief: """Complete SEO content brief.""" primary_keyword: str secondary_keywords: list[str] = field(default_factory=list) lsi_keywords: list[str] = field(default_factory=list) target_word_count: int = 1500 word_count_range: tuple[int, int] = (1200, 1800) suggested_title: str = "" meta_description: str = "" outline: list[OutlineSection] = field(default_factory=list) competitor_analysis: list[CompetitorPageAnalysis] = field(default_factory=list) internal_links: list[dict[str, str]] = field(default_factory=list) content_format: str = "blog" korean_format_recommendations: list[str] = field(default_factory=list) search_intent: str = "informational" notes: list[str] = field(default_factory=list) timestamp: str = "" # --------------------------------------------------------------------------- # Search intent patterns # --------------------------------------------------------------------------- INTENT_PATTERNS = { "transactional": [ r"buy", r"purchase", r"price", r"cost", r"order", r"shop", r"구매", r"주문", r"가격", r"비용", r"할인", r"쿠폰", ], "navigational": [ r"login", r"sign in", r"official", r"website", r"로그인", r"공식", r"홈페이지", ], "commercial": [ r"best", r"top", r"review", r"compare", r"vs", r"추천", r"비교", r"후기", r"리뷰", r"순위", ], "informational": [ r"what", r"how", r"why", r"guide", r"tutorial", r"이란", r"방법", r"가이드", r"효과", r"원인", ], } # --------------------------------------------------------------------------- # Korean content format recommendations # --------------------------------------------------------------------------- KOREAN_FORMAT_TIPS = { "transactional": [ "가격 비교표를 포함하세요 (경쟁사 가격 대비)", "실제 비용 사례를 3개 이상 제시하세요", "결제 방법 및 할인 정보를 명확히 안내하세요", "CTA(행동 유도) 버튼을 여러 위치에 배치하세요", ], "commercial": [ "네이버 블로그 스타일의 솔직한 후기 톤을 사용하세요", "장단점을 균형 있게 비교하세요", "실제 사용 사진 또는 전후 비교 이미지를 포함하세요", "별점 또는 점수 평가 체계를 추가하세요", "FAQ 섹션을 포함하세요 (네이버 검색 노출에 유리)", ], "informational": [ "핵심 정보를 글 상단에 요약하세요 (두괄식 구성)", "전문 용어는 쉬운 설명을 병기하세요", "인포그래픽 또는 도표를 활용하세요", "관련 콘텐츠 내부 링크를 3-5개 포함하세요", "전문가 인용 또는 출처를 명시하세요 (E-E-A-T 강화)", ], "navigational": [ "공식 정보와 연락처를 최상단에 배치하세요", "지도 임베드를 포함하세요 (네이버 지도/구글 맵)", "영업시간, 주소, 전화번호를 명확히 표시하세요", ], } # --------------------------------------------------------------------------- # ContentBriefGenerator # --------------------------------------------------------------------------- class ContentBriefGenerator(BaseAsyncClient): """Generate comprehensive SEO content briefs.""" def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0): super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second) self.session: aiohttp.ClientSession | None = None async def _ensure_session(self) -> aiohttp.ClientSession: if self.session is None or self.session.closed: timeout = aiohttp.ClientTimeout(total=30) headers = { "User-Agent": "Mozilla/5.0 (compatible; SEOContentBrief/1.0)", } self.session = aiohttp.ClientSession(timeout=timeout, headers=headers) return self.session async def close(self) -> None: if self.session and not self.session.closed: await self.session.close() # ------------------------------------------------------------------ # Analyze top ranking results # ------------------------------------------------------------------ async def analyze_top_results( self, keyword: str, site_url: str | None = None, num_competitors: int = 5, ) -> list[CompetitorPageAnalysis]: """ Analyze top ranking pages for a keyword using Ahrefs SERP data. Falls back to fetching pages directly if Ahrefs data is unavailable. """ self.logger.info(f"Analyzing top results for: {keyword}") results: list[CompetitorPageAnalysis] = [] # Try Ahrefs organic keywords to find ranking pages try: api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None if api_key: resp = requests.get( "https://api.ahrefs.com/v3/serp-overview", params={"keyword": keyword, "select": "url,title,position,traffic"}, headers={"Authorization": f"Bearer {api_key}"}, timeout=30, ) if resp.status_code == 200: data = resp.json() serp_items = data.get("positions", data.get("items", []))[:num_competitors] for item in serp_items: analysis = CompetitorPageAnalysis( url=item.get("url", ""), title=item.get("title", ""), ) results.append(analysis) except Exception as exc: self.logger.warning(f"Ahrefs SERP lookup failed: {exc}") # Fetch and analyze each page session = await self._ensure_session() for analysis in results[:num_competitors]: if not analysis.url: continue try: async with session.get(analysis.url) as resp: if resp.status != 200: continue html = await resp.text() self._analyze_page_content(analysis, html) except Exception as exc: self.logger.debug(f"Failed to fetch {analysis.url}: {exc}") self.logger.info(f"Analyzed {len(results)} competitor pages") return results @staticmethod def _analyze_page_content(analysis: CompetitorPageAnalysis, html: str) -> None: """Parse HTML and extract content metrics.""" soup = BeautifulSoup(html, "html.parser") # Title title_tag = soup.find("title") if title_tag and not analysis.title: analysis.title = title_tag.get_text(strip=True) # Word count (visible text only) for tag in soup(["script", "style", "nav", "header", "footer"]): tag.decompose() visible_text = soup.get_text(separator=" ", strip=True) analysis.word_count = len(visible_text.split()) # Headings headings: list[dict[str, str]] = [] for level in range(1, 7): for h in soup.find_all(f"h{level}"): text = h.get_text(strip=True) if text: headings.append({"level": f"H{level}", "text": text}) analysis.headings = headings # Content features analysis.has_images = len(soup.find_all("img")) > 2 analysis.has_video = bool(soup.find("video") or soup.find("iframe", src=re.compile(r"youtube|vimeo"))) analysis.has_faq = bool( soup.find(string=re.compile(r"FAQ|자주\s*묻는\s*질문|Q\s*&\s*A", re.IGNORECASE)) or soup.find("script", type="application/ld+json", string=re.compile(r"FAQPage")) ) analysis.has_table = bool(soup.find("table")) # Topics covered (from H2 headings) analysis.topics_covered = [ h["text"] for h in headings if h["level"] == "H2" ][:15] # ------------------------------------------------------------------ # Extract content outline # ------------------------------------------------------------------ def extract_outline( self, keyword: str, top_results: list[CompetitorPageAnalysis], ) -> list[OutlineSection]: """ Build recommended H2/H3 outline by aggregating competitor headings. Identifies common topics across top-ranking pages and structures them into a logical outline. """ # Collect all H2 headings h2_topics: dict[str, int] = {} h3_by_h2: dict[str, list[str]] = {} for result in top_results: current_h2 = "" for heading in result.headings: text = heading["text"].strip() if heading["level"] == "H2": current_h2 = text h2_topics[text] = h2_topics.get(text, 0) + 1 elif heading["level"] == "H3" and current_h2: if current_h2 not in h3_by_h2: h3_by_h2[current_h2] = [] h3_by_h2[current_h2].append(text) # Sort H2s by frequency (most common topics first) sorted_h2s = sorted(h2_topics.items(), key=lambda x: x[1], reverse=True) # Build outline outline: list[OutlineSection] = [] target_word_count = self.calculate_word_count(top_results) words_per_section = target_word_count // max(len(sorted_h2s), 5) for h2_text, frequency in sorted_h2s[:8]: section = OutlineSection( heading=h2_text, level=2, target_words=words_per_section, talking_points=[], ) # Add H3 subtopics if h2_text in h3_by_h2: unique_h3s = list(dict.fromkeys(h3_by_h2[h2_text]))[:5] for h3_text in unique_h3s: subsection = OutlineSection( heading=h3_text, level=3, target_words=words_per_section // 3, ) section.talking_points.append(h3_text) outline.append(section) # Ensure FAQ section if common faq_count = sum(1 for r in top_results if r.has_faq) if faq_count >= 2 and not any("FAQ" in s.heading or "질문" in s.heading for s in outline): outline.append(OutlineSection( heading="자주 묻는 질문 (FAQ)", level=2, target_words=300, talking_points=[ f"{keyword} 관련 자주 묻는 질문 5-7개", "Schema markup (FAQPage) 적용 권장", ], )) return outline # ------------------------------------------------------------------ # Keyword suggestions # ------------------------------------------------------------------ async def suggest_keywords(self, primary_keyword: str) -> dict[str, list[str]]: """ Generate primary, secondary, and LSI keyword suggestions. Uses Ahrefs related keywords and matching terms data. """ self.logger.info(f"Generating keyword suggestions for: {primary_keyword}") result = { "primary": [primary_keyword], "secondary": [], "lsi": [], } try: api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None if not api_key: self.logger.warning("AHREFS_API_KEY not set; returning basic keywords only") return result # Matching terms resp = requests.get( "https://api.ahrefs.com/v3/keywords-explorer/matching-terms", params={"keyword": primary_keyword, "limit": 20, "select": "keyword,volume,difficulty"}, headers={"Authorization": f"Bearer {api_key}"}, timeout=30, ) if resp.status_code == 200: data = resp.json() terms = data.get("keywords", data.get("items", [])) for term in terms: kw = term.get("keyword", "") if kw and kw.lower() != primary_keyword.lower(): result["secondary"].append(kw) # Related terms (LSI) resp2 = requests.get( "https://api.ahrefs.com/v3/keywords-explorer/related-terms", params={"keyword": primary_keyword, "limit": 15, "select": "keyword,volume"}, headers={"Authorization": f"Bearer {api_key}"}, timeout=30, ) if resp2.status_code == 200: data2 = resp2.json() related = data2.get("keywords", data2.get("items", [])) for term in related: kw = term.get("keyword", "") if kw and kw not in result["secondary"]: result["lsi"].append(kw) except Exception as exc: self.logger.warning(f"Keyword suggestion lookup failed: {exc}") return result # ------------------------------------------------------------------ # Word count calculation # ------------------------------------------------------------------ @staticmethod def calculate_word_count(top_results: list[CompetitorPageAnalysis]) -> int: """ Calculate target word count based on top 5 ranking pages. Returns the average word count of top 5 with +/- 20% range. """ word_counts = [r.word_count for r in top_results[:5] if r.word_count > 100] if not word_counts: return 1500 # Default fallback avg = sum(word_counts) / len(word_counts) # Round to nearest 100 target = round(avg / 100) * 100 return max(800, min(5000, target)) # ------------------------------------------------------------------ # Internal linking suggestions # ------------------------------------------------------------------ async def suggest_internal_links( self, keyword: str, site_url: str, ) -> list[dict[str, str]]: """ Find related existing pages on the site for internal linking. Uses Ahrefs organic keywords to find pages ranking for related terms. """ self.logger.info(f"Finding internal link opportunities for {keyword} on {site_url}") links: list[dict[str, str]] = [] target = urlparse(site_url).netloc or site_url try: api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None if not api_key: return links resp = requests.get( "https://api.ahrefs.com/v3/site-explorer/organic-keywords", params={ "target": target, "limit": 50, "select": "keyword,url,position,traffic", }, headers={"Authorization": f"Bearer {api_key}"}, timeout=30, ) if resp.status_code != 200: return links data = resp.json() keywords_data = data.get("keywords", data.get("items", [])) # Find pages ranking for related keywords keyword_lower = keyword.lower() keyword_words = set(keyword_lower.split()) seen_urls: set[str] = set() for item in keywords_data: kw = item.get("keyword", "").lower() url = item.get("url", "") if not url or url in seen_urls: continue # Check keyword relevance kw_words = set(kw.split()) overlap = keyword_words & kw_words if overlap and kw != keyword_lower: links.append({ "url": url, "anchor_text": kw, "relevance": f"{len(overlap)}/{len(keyword_words)} word overlap", "current_traffic": str(item.get("traffic", 0)), }) seen_urls.add(url) links.sort(key=lambda l: int(l.get("current_traffic", "0")), reverse=True) except Exception as exc: self.logger.warning(f"Internal link suggestion failed: {exc}") return links[:10] # ------------------------------------------------------------------ # Search intent detection # ------------------------------------------------------------------ @staticmethod def detect_search_intent(keyword: str) -> str: """Classify keyword search intent.""" keyword_lower = keyword.lower() scores: dict[str, int] = {} for intent, patterns in INTENT_PATTERNS.items(): score = sum(1 for p in patterns if re.search(p, keyword_lower, re.IGNORECASE)) if score > 0: scores[intent] = score if not scores: return "informational" return max(scores, key=scores.get) # ------------------------------------------------------------------ # Orchestration # ------------------------------------------------------------------ async def generate( self, keyword: str, site_url: str, num_competitors: int = 5, ) -> ContentBrief: """ Generate a comprehensive SEO content brief. Args: keyword: Primary target keyword. site_url: Target website URL. num_competitors: Number of competitor pages to analyze. Returns: ContentBrief with outline, keywords, and recommendations. """ self.logger.info(f"Generating content brief for: {keyword}") # Detect search intent intent = self.detect_search_intent(keyword) # Run analyses in parallel top_results_task = self.analyze_top_results(keyword, site_url, num_competitors) keywords_task = self.suggest_keywords(keyword) internal_links_task = self.suggest_internal_links(keyword, site_url) top_results, keyword_data, internal_links = await asyncio.gather( top_results_task, keywords_task, internal_links_task, ) # Calculate word count target target_word_count = self.calculate_word_count(top_results) word_count_min = int(target_word_count * 0.8) word_count_max = int(target_word_count * 1.2) # Build outline outline = self.extract_outline(keyword, top_results) # Generate title suggestion suggested_title = self._generate_title(keyword, intent) # Generate meta description meta_description = self._generate_meta_description(keyword, intent) # Korean format recommendations korean_tips = KOREAN_FORMAT_TIPS.get(intent, KOREAN_FORMAT_TIPS["informational"]) brief = ContentBrief( primary_keyword=keyword, secondary_keywords=keyword_data.get("secondary", [])[:10], lsi_keywords=keyword_data.get("lsi", [])[:10], target_word_count=target_word_count, word_count_range=(word_count_min, word_count_max), suggested_title=suggested_title, meta_description=meta_description, outline=outline, competitor_analysis=top_results, internal_links=internal_links, content_format=self._suggest_format(intent, top_results), korean_format_recommendations=korean_tips, search_intent=intent, timestamp=datetime.now().isoformat(), ) self.logger.info( f"Brief generated: {len(outline)} sections, " f"{target_word_count} target words, " f"{len(keyword_data.get('secondary', []))} secondary keywords" ) return brief @staticmethod def _generate_title(keyword: str, intent: str) -> str: """Generate a suggested title based on keyword and intent.""" templates = { "informational": "{keyword} - 완벽 가이드 (2025년 최신)", "commercial": "{keyword} 추천 TOP 10 비교 (전문가 리뷰)", "transactional": "{keyword} 가격 비교 및 구매 가이드", "navigational": "{keyword} - 공식 안내", } template = templates.get(intent, templates["informational"]) return template.format(keyword=keyword) @staticmethod def _generate_meta_description(keyword: str, intent: str) -> str: """Generate a suggested meta description.""" templates = { "informational": ( f"{keyword}에 대해 알아야 할 모든 것을 정리했습니다. " "전문가가 알려주는 핵심 정보와 실용적인 가이드를 확인하세요." ), "commercial": ( f"{keyword} 비교 분석! 장단점, 가격, 실제 후기를 " "한눈에 비교하고 최적의 선택을 하세요." ), "transactional": ( f"{keyword} 최저가 비교 및 구매 방법을 안내합니다. " "합리적인 가격으로 구매하는 팁을 확인하세요." ), "navigational": ( f"{keyword} 공식 정보 및 이용 안내. " "정확한 정보를 빠르게 확인하세요." ), } return templates.get(intent, templates["informational"]) @staticmethod def _suggest_format(intent: str, results: list[CompetitorPageAnalysis]) -> str: """Suggest content format based on intent and competitor analysis.""" if intent == "commercial": return "listicle" if intent == "informational": return "guide" if intent == "transactional": return "landing" # Check competitor patterns avg_word_count = ( sum(r.word_count for r in results) / len(results) if results else 0 ) if avg_word_count > 3000: return "comprehensive_guide" return "blog" # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="SEO Content Brief Generator", ) parser.add_argument("--keyword", required=True, help="Primary target keyword") parser.add_argument("--url", required=True, help="Target website URL") parser.add_argument("--competitors", type=int, default=5, help="Number of competitor pages to analyze (default: 5)") parser.add_argument("--json", action="store_true", help="Output as JSON") parser.add_argument("--output", help="Save output to file") return parser def format_text_report(brief: ContentBrief) -> str: """Format content brief as human-readable text.""" lines: list[str] = [] lines.append(f"## Content Brief: {brief.primary_keyword}") lines.append(f"**Date**: {brief.timestamp[:10]}") lines.append(f"**Search Intent**: {brief.search_intent}") lines.append(f"**Content Format**: {brief.content_format}") lines.append("") lines.append("### Target Metrics") lines.append(f"- Word count: {brief.target_word_count} ({brief.word_count_range[0]}-{brief.word_count_range[1]})") lines.append(f"- Suggested title: {brief.suggested_title}") lines.append(f"- Meta description: {brief.meta_description}") lines.append("") lines.append("### Keywords") lines.append(f"- **Primary**: {brief.primary_keyword}") if brief.secondary_keywords: lines.append(f"- **Secondary**: {', '.join(brief.secondary_keywords[:8])}") if brief.lsi_keywords: lines.append(f"- **LSI**: {', '.join(brief.lsi_keywords[:8])}") lines.append("") lines.append("### Content Outline") for section in brief.outline: prefix = "##" if section.level == 2 else "###" lines.append(f" {prefix} {section.heading} (~{section.target_words}w)") for point in section.talking_points: lines.append(f" - {point}") lines.append("") if brief.competitor_analysis: lines.append(f"### Competitor Analysis ({len(brief.competitor_analysis)} pages)") for comp in brief.competitor_analysis: lines.append(f" - **{comp.title or comp.url}**") lines.append(f" Word count: {comp.word_count} | Headings: {len(comp.headings)}") features = [] if comp.has_images: features.append("images") if comp.has_video: features.append("video") if comp.has_faq: features.append("FAQ") if comp.has_table: features.append("table") if features: lines.append(f" Features: {', '.join(features)}") lines.append("") if brief.internal_links: lines.append(f"### Internal Linking Suggestions ({len(brief.internal_links)})") for link in brief.internal_links[:7]: lines.append(f" - [{link['anchor_text']}]({link['url']})") lines.append("") if brief.korean_format_recommendations: lines.append("### Korean Content Format Recommendations") for tip in brief.korean_format_recommendations: lines.append(f" - {tip}") return "\n".join(lines) async def main() -> None: parser = build_parser() args = parser.parse_args() generator = ContentBriefGenerator() try: brief = await generator.generate( keyword=args.keyword, site_url=args.url, num_competitors=args.competitors, ) if args.json: output = json.dumps(asdict(brief), ensure_ascii=False, indent=2, default=str) else: output = format_text_report(brief) if args.output: with open(args.output, "w", encoding="utf-8") as f: f.write(output) logger.info(f"Output saved to {args.output}") else: print(output) finally: await generator.close() generator.print_stats() if __name__ == "__main__": asyncio.run(main())