Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,902 @@
|
||||
"""
|
||||
Entity Auditor
|
||||
===============
|
||||
Purpose: Audit entity SEO signals including PAA monitoring, FAQ schema tracking,
|
||||
entity markup validation, and brand SERP analysis.
|
||||
Python: 3.10+
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from urllib.parse import quote, urljoin, urlparse
|
||||
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from base_client import BaseAsyncClient, ConfigManager, config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
console = Console()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class PaaQuestion:
|
||||
"""A People Also Ask question found in SERP."""
|
||||
question: str = ""
|
||||
keyword: str = ""
|
||||
position: int = 0
|
||||
source_url: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FaqRichResult:
|
||||
"""FAQ rich result tracking entry."""
|
||||
url: str = ""
|
||||
question_count: int = 0
|
||||
appearing_in_serp: bool = False
|
||||
questions: list[str] = field(default_factory=list)
|
||||
schema_valid: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntitySchema:
|
||||
"""Entity structured data found on a website."""
|
||||
type: str = "" # Organization, Person, LocalBusiness, etc.
|
||||
properties: dict[str, Any] = field(default_factory=dict)
|
||||
same_as_links: list[str] = field(default_factory=list)
|
||||
completeness: float = 0.0
|
||||
issues: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrandSerpResult:
|
||||
"""What appears when searching for the brand name."""
|
||||
query: str = ""
|
||||
features: list[str] = field(default_factory=list)
|
||||
paa_count: int = 0
|
||||
faq_count: int = 0
|
||||
knowledge_panel: bool = False
|
||||
sitelinks: bool = False
|
||||
social_profiles: list[str] = field(default_factory=list)
|
||||
top_results: list[dict[str, str]] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityAuditResult:
|
||||
"""Full entity SEO audit result."""
|
||||
url: str = ""
|
||||
entity_name: str = ""
|
||||
paa_questions: list[PaaQuestion] = field(default_factory=list)
|
||||
faq_rich_results: list[FaqRichResult] = field(default_factory=list)
|
||||
entity_schemas: list[EntitySchema] = field(default_factory=list)
|
||||
brand_serp: BrandSerpResult = field(default_factory=BrandSerpResult)
|
||||
social_profile_status: dict[str, bool] = field(default_factory=dict)
|
||||
overall_score: float = 0.0
|
||||
recommendations: list[str] = field(default_factory=list)
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entity Auditor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class EntityAuditor(BaseAsyncClient):
|
||||
"""Audit entity SEO signals and rich result presence."""
|
||||
|
||||
GOOGLE_SEARCH_URL = "https://www.google.com/search"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
PAA_KEYWORD_TEMPLATES = [
|
||||
"{entity}",
|
||||
"{entity} reviews",
|
||||
"{entity} vs",
|
||||
"what is {entity}",
|
||||
"{entity} pricing",
|
||||
"{entity} alternatives",
|
||||
"is {entity} good",
|
||||
"{entity} benefits",
|
||||
"how to use {entity}",
|
||||
"{entity} complaints",
|
||||
]
|
||||
|
||||
EXPECTED_SCHEMA_PROPERTIES = {
|
||||
"Organization": [
|
||||
"name", "url", "logo", "description", "sameAs",
|
||||
"contactPoint", "address", "foundingDate", "founder",
|
||||
"numberOfEmployees", "email", "telephone",
|
||||
],
|
||||
"Person": [
|
||||
"name", "url", "image", "description", "sameAs",
|
||||
"jobTitle", "worksFor", "alumniOf", "birthDate",
|
||||
],
|
||||
"LocalBusiness": [
|
||||
"name", "url", "image", "description", "sameAs",
|
||||
"address", "telephone", "openingHours", "geo",
|
||||
"priceRange", "aggregateRating",
|
||||
],
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.config = config
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# PAA monitoring
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def monitor_paa(
|
||||
self,
|
||||
entity_name: str,
|
||||
keywords: list[str] | None = None,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> list[PaaQuestion]:
|
||||
"""Search brand keywords and extract People Also Ask questions."""
|
||||
if keywords is None:
|
||||
keywords = [t.format(entity=entity_name) for t in self.PAA_KEYWORD_TEMPLATES]
|
||||
|
||||
paa_questions: list[PaaQuestion] = []
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
for keyword in keywords:
|
||||
params = {"q": keyword, "hl": "en", "gl": "us"}
|
||||
try:
|
||||
async with session.get(
|
||||
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
|
||||
timeout=aiohttp.ClientTimeout(total=20),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning("Search for '%s' returned status %d", keyword, resp.status)
|
||||
continue
|
||||
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
# PAA box selectors
|
||||
paa_selectors = [
|
||||
"div[data-sgrd] div[data-q]",
|
||||
"div.related-question-pair",
|
||||
"div[jsname] div[data-q]",
|
||||
"div.wQiwMc",
|
||||
]
|
||||
|
||||
position = 0
|
||||
for selector in paa_selectors:
|
||||
elements = soup.select(selector)
|
||||
for el in elements:
|
||||
question_text = el.get("data-q", "") or el.get_text(strip=True)
|
||||
if question_text and len(question_text) > 5:
|
||||
position += 1
|
||||
paa_questions.append(PaaQuestion(
|
||||
question=question_text,
|
||||
keyword=keyword,
|
||||
position=position,
|
||||
))
|
||||
|
||||
# Fallback: regex for PAA-like questions
|
||||
if not paa_questions:
|
||||
text = soup.get_text(separator="\n")
|
||||
q_patterns = re.findall(
|
||||
r"((?:What|How|Why|When|Where|Who|Is|Can|Does|Do|Which)\s+[^?\n]{10,80}\??)",
|
||||
text,
|
||||
)
|
||||
for i, q in enumerate(q_patterns[:8]):
|
||||
paa_questions.append(PaaQuestion(
|
||||
question=q.strip(),
|
||||
keyword=keyword,
|
||||
position=i + 1,
|
||||
))
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("PAA search failed for '%s': %s", keyword, exc)
|
||||
continue
|
||||
|
||||
# Rate limit between searches
|
||||
await asyncio.sleep(1.5)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
# Deduplicate questions
|
||||
seen = set()
|
||||
unique = []
|
||||
for q in paa_questions:
|
||||
key = q.question.lower().strip()
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(q)
|
||||
|
||||
logger.info("Found %d unique PAA questions for '%s'", len(unique), entity_name)
|
||||
return unique
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# FAQ rich result tracking
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def track_faq_rich_results(
|
||||
self,
|
||||
url: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> list[FaqRichResult]:
|
||||
"""Check pages for FAQPage schema and SERP appearance."""
|
||||
faq_results: list[FaqRichResult] = []
|
||||
domain = urlparse(url).netloc
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
# Fetch the page and look for FAQ schema
|
||||
async with session.get(
|
||||
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning("Page %s returned status %d", url, resp.status)
|
||||
return faq_results
|
||||
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
# Find JSON-LD scripts with FAQPage
|
||||
scripts = soup.find_all("script", type="application/ld+json")
|
||||
for script in scripts:
|
||||
try:
|
||||
data = json.loads(script.string or "{}")
|
||||
items = data if isinstance(data, list) else [data]
|
||||
|
||||
for item in items:
|
||||
schema_type = item.get("@type", "")
|
||||
if schema_type == "FAQPage" or (
|
||||
isinstance(schema_type, list) and "FAQPage" in schema_type
|
||||
):
|
||||
questions = item.get("mainEntity", [])
|
||||
faq = FaqRichResult(
|
||||
url=url,
|
||||
question_count=len(questions),
|
||||
questions=[
|
||||
q.get("name", "") for q in questions if isinstance(q, dict)
|
||||
],
|
||||
schema_valid=True,
|
||||
)
|
||||
faq_results.append(faq)
|
||||
|
||||
# Check for nested @graph
|
||||
graph = item.get("@graph", [])
|
||||
for g_item in graph:
|
||||
if g_item.get("@type") == "FAQPage":
|
||||
questions = g_item.get("mainEntity", [])
|
||||
faq = FaqRichResult(
|
||||
url=url,
|
||||
question_count=len(questions),
|
||||
questions=[
|
||||
q.get("name", "") for q in questions if isinstance(q, dict)
|
||||
],
|
||||
schema_valid=True,
|
||||
)
|
||||
faq_results.append(faq)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Also check for microdata FAQ markup
|
||||
faq_items = soup.select("[itemtype*='FAQPage'] [itemprop='mainEntity']")
|
||||
if faq_items and not faq_results:
|
||||
questions = []
|
||||
for item in faq_items:
|
||||
q_el = item.select_one("[itemprop='name']")
|
||||
if q_el:
|
||||
questions.append(q_el.get_text(strip=True))
|
||||
faq_results.append(FaqRichResult(
|
||||
url=url,
|
||||
question_count=len(questions),
|
||||
questions=questions,
|
||||
schema_valid=True,
|
||||
))
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("FAQ tracking failed for %s: %s", url, exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
logger.info("Found %d FAQ schemas on %s", len(faq_results), url)
|
||||
return faq_results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Entity schema audit
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def audit_entity_schema(
|
||||
self,
|
||||
url: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> list[EntitySchema]:
|
||||
"""Check Organization/Person/LocalBusiness schema on website."""
|
||||
schemas: list[EntitySchema] = []
|
||||
target_types = {"Organization", "Person", "LocalBusiness", "Corporation", "MedicalBusiness"}
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(
|
||||
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning("Page %s returned status %d", url, resp.status)
|
||||
return schemas
|
||||
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
scripts = soup.find_all("script", type="application/ld+json")
|
||||
for script in scripts:
|
||||
try:
|
||||
data = json.loads(script.string or "{}")
|
||||
items = data if isinstance(data, list) else [data]
|
||||
|
||||
# Include @graph nested items
|
||||
expanded = []
|
||||
for item in items:
|
||||
expanded.append(item)
|
||||
if "@graph" in item:
|
||||
expanded.extend(item["@graph"])
|
||||
|
||||
for item in expanded:
|
||||
item_type = item.get("@type", "")
|
||||
if isinstance(item_type, list):
|
||||
matching = [t for t in item_type if t in target_types]
|
||||
if not matching:
|
||||
continue
|
||||
item_type = matching[0]
|
||||
elif item_type not in target_types:
|
||||
continue
|
||||
|
||||
same_as = item.get("sameAs", [])
|
||||
if isinstance(same_as, str):
|
||||
same_as = [same_as]
|
||||
|
||||
# Calculate completeness
|
||||
base_type = item_type
|
||||
if base_type == "Corporation":
|
||||
base_type = "Organization"
|
||||
elif base_type == "MedicalBusiness":
|
||||
base_type = "LocalBusiness"
|
||||
|
||||
expected = self.EXPECTED_SCHEMA_PROPERTIES.get(base_type, [])
|
||||
present = [k for k in expected if k in item and item[k]]
|
||||
completeness = round((len(present) / len(expected)) * 100, 1) if expected else 0
|
||||
|
||||
# Check for issues
|
||||
issues = []
|
||||
if "name" not in item:
|
||||
issues.append("Missing 'name' property")
|
||||
if "url" not in item:
|
||||
issues.append("Missing 'url' property")
|
||||
if not same_as:
|
||||
issues.append("No 'sameAs' links (social profiles)")
|
||||
if "logo" not in item and base_type == "Organization":
|
||||
issues.append("Missing 'logo' property")
|
||||
if "description" not in item:
|
||||
issues.append("Missing 'description' property")
|
||||
|
||||
schema = EntitySchema(
|
||||
type=item_type,
|
||||
properties={k: (str(v)[:100] if not isinstance(v, (list, dict)) else v) for k, v in item.items() if k != "@context"},
|
||||
same_as_links=same_as,
|
||||
completeness=completeness,
|
||||
issues=issues,
|
||||
)
|
||||
schemas.append(schema)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Entity schema audit failed for %s: %s", url, exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
logger.info("Found %d entity schemas on %s", len(schemas), url)
|
||||
return schemas
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Brand SERP analysis
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def analyze_brand_serp(
|
||||
self,
|
||||
entity_name: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> BrandSerpResult:
|
||||
"""Analyze what appears in SERP for the brand name search."""
|
||||
result = BrandSerpResult(query=entity_name)
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
params = {"q": entity_name, "hl": "en", "gl": "us"}
|
||||
async with session.get(
|
||||
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
|
||||
timeout=aiohttp.ClientTimeout(total=20),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
return result
|
||||
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
text = soup.get_text(separator=" ", strip=True).lower()
|
||||
|
||||
# Detect SERP features
|
||||
feature_indicators = {
|
||||
"knowledge_panel": ["kp-wholepage", "knowledge-panel", "kno-"],
|
||||
"sitelinks": ["sitelinks", "site-links"],
|
||||
"people_also_ask": ["related-question-pair", "data-q"],
|
||||
"faq_rich_result": ["faqpage", "frequently asked"],
|
||||
"featured_snippet": ["featured-snippet", "data-tts"],
|
||||
"image_pack": ["image-result", "img-brk"],
|
||||
"video_carousel": ["video-result", "vid-"],
|
||||
"twitter_carousel": ["twitter-timeline", "g-scrolling-carousel"],
|
||||
"reviews": ["star-rating", "aggregate-rating"],
|
||||
"local_pack": ["local-pack", "local_pack"],
|
||||
}
|
||||
|
||||
for feature, indicators in feature_indicators.items():
|
||||
for ind in indicators:
|
||||
if ind in str(soup).lower():
|
||||
result.features.append(feature)
|
||||
break
|
||||
|
||||
result.knowledge_panel = "knowledge_panel" in result.features
|
||||
result.sitelinks = "sitelinks" in result.features
|
||||
|
||||
# Count PAA questions
|
||||
paa_elements = soup.select("div[data-q], div.related-question-pair")
|
||||
result.paa_count = len(paa_elements)
|
||||
if result.paa_count > 0 and "people_also_ask" not in result.features:
|
||||
result.features.append("people_also_ask")
|
||||
|
||||
# Detect social profiles in results
|
||||
social_domains = {
|
||||
"twitter.com": "twitter", "x.com": "twitter",
|
||||
"facebook.com": "facebook", "linkedin.com": "linkedin",
|
||||
"youtube.com": "youtube", "instagram.com": "instagram",
|
||||
"github.com": "github", "pinterest.com": "pinterest",
|
||||
}
|
||||
links = soup.find_all("a", href=True)
|
||||
for link in links:
|
||||
href = link["href"]
|
||||
for domain, name in social_domains.items():
|
||||
if domain in href and name not in result.social_profiles:
|
||||
result.social_profiles.append(name)
|
||||
|
||||
# Extract top organic results
|
||||
result_divs = soup.select("div.g, div[data-sokoban-container]")[:10]
|
||||
for div in result_divs:
|
||||
title_el = div.select_one("h3")
|
||||
link_el = div.select_one("a[href]")
|
||||
if title_el and link_el:
|
||||
result.top_results.append({
|
||||
"title": title_el.get_text(strip=True),
|
||||
"url": link_el.get("href", ""),
|
||||
})
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Brand SERP analysis failed for '%s': %s", entity_name, exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Social profile link validation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def check_social_profile_links(
|
||||
self,
|
||||
same_as_links: list[str],
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> dict[str, bool]:
|
||||
"""Validate sameAs URLs are accessible."""
|
||||
status: dict[str, bool] = {}
|
||||
if not same_as_links:
|
||||
return status
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
for link in same_as_links:
|
||||
try:
|
||||
async with session.head(
|
||||
link, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=10),
|
||||
allow_redirects=True,
|
||||
) as resp:
|
||||
status[link] = resp.status < 400
|
||||
except Exception:
|
||||
status[link] = False
|
||||
|
||||
await asyncio.sleep(0.5)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
accessible = sum(1 for v in status.values() if v)
|
||||
logger.info("Social profile links: %d/%d accessible", accessible, len(status))
|
||||
return status
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Recommendations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def generate_recommendations(self, result: EntityAuditResult) -> list[str]:
|
||||
"""Generate actionable entity SEO improvement recommendations."""
|
||||
recs: list[str] = []
|
||||
|
||||
# PAA recommendations
|
||||
if not result.paa_questions:
|
||||
recs.append(
|
||||
"브랜드 관련 People Also Ask(PAA) 질문이 감지되지 않았습니다. "
|
||||
"FAQ 콘텐츠를 작성하여 PAA 노출 기회를 확보하세요."
|
||||
)
|
||||
elif len(result.paa_questions) < 5:
|
||||
recs.append(
|
||||
f"PAA 질문이 {len(result.paa_questions)}개만 감지되었습니다. "
|
||||
"더 다양한 키워드에 대한 Q&A 콘텐츠를 강화하세요."
|
||||
)
|
||||
|
||||
# FAQ schema recommendations
|
||||
if not result.faq_rich_results:
|
||||
recs.append(
|
||||
"FAQPage schema가 감지되지 않았습니다. "
|
||||
"FAQ 페이지에 FAQPage JSON-LD를 추가하여 Rich Result를 확보하세요."
|
||||
)
|
||||
else:
|
||||
invalid = [f for f in result.faq_rich_results if not f.schema_valid]
|
||||
if invalid:
|
||||
recs.append(
|
||||
f"{len(invalid)}개의 FAQ schema에 유효성 문제가 있습니다. "
|
||||
"Google Rich Results Test로 검증하세요."
|
||||
)
|
||||
|
||||
# Entity schema recommendations
|
||||
if not result.entity_schemas:
|
||||
recs.append(
|
||||
"Organization/Person/LocalBusiness schema가 없습니다. "
|
||||
"홈페이지에 Organization schema JSON-LD를 추가하세요."
|
||||
)
|
||||
else:
|
||||
for schema in result.entity_schemas:
|
||||
if schema.completeness < 50:
|
||||
recs.append(
|
||||
f"{schema.type} schema 완성도가 {schema.completeness}%입니다. "
|
||||
f"누락 항목: {', '.join(schema.issues[:3])}"
|
||||
)
|
||||
if not schema.same_as_links:
|
||||
recs.append(
|
||||
f"{schema.type} schema에 sameAs 속성이 없습니다. "
|
||||
"소셜 미디어 프로필 URL을 sameAs에 추가하세요."
|
||||
)
|
||||
|
||||
# Brand SERP recommendations
|
||||
serp = result.brand_serp
|
||||
if not serp.knowledge_panel:
|
||||
recs.append(
|
||||
"브랜드 검색 시 Knowledge Panel이 표시되지 않습니다. "
|
||||
"Wikipedia, Wikidata, 구조화된 데이터를 통해 엔티티 인식을 강화하세요."
|
||||
)
|
||||
if not serp.sitelinks:
|
||||
recs.append(
|
||||
"Sitelinks가 표시되지 않습니다. "
|
||||
"사이트 구조와 내부 링크를 개선하세요."
|
||||
)
|
||||
if len(serp.social_profiles) < 3:
|
||||
recs.append(
|
||||
f"SERP에 소셜 프로필이 {len(serp.social_profiles)}개만 표시됩니다. "
|
||||
"주요 소셜 미디어 프로필을 활성화하고 schema sameAs에 연결하세요."
|
||||
)
|
||||
|
||||
# Social profile accessibility
|
||||
broken = [url for url, ok in result.social_profile_status.items() if not ok]
|
||||
if broken:
|
||||
recs.append(
|
||||
f"접근 불가한 소셜 프로필 링크 {len(broken)}개: "
|
||||
f"{', '.join(broken[:3])}. sameAs URL을 업데이트하세요."
|
||||
)
|
||||
|
||||
if not recs:
|
||||
recs.append("Entity SEO 상태가 양호합니다. 현재 수준을 유지하세요.")
|
||||
|
||||
return recs
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Scoring
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def compute_score(self, result: EntityAuditResult) -> float:
|
||||
"""Compute overall entity SEO score (0-100)."""
|
||||
score = 0.0
|
||||
|
||||
# PAA presence (15 points)
|
||||
paa_count = len(result.paa_questions)
|
||||
if paa_count >= 10:
|
||||
score += 15
|
||||
elif paa_count >= 5:
|
||||
score += 10
|
||||
elif paa_count > 0:
|
||||
score += 5
|
||||
|
||||
# FAQ schema (15 points)
|
||||
if result.faq_rich_results:
|
||||
valid_count = sum(1 for f in result.faq_rich_results if f.schema_valid)
|
||||
score += min(15, valid_count * 5)
|
||||
|
||||
# Entity schema (25 points)
|
||||
if result.entity_schemas:
|
||||
best_completeness = max(s.completeness for s in result.entity_schemas)
|
||||
score += best_completeness * 0.25
|
||||
|
||||
# Brand SERP features (25 points)
|
||||
serp = result.brand_serp
|
||||
if serp.knowledge_panel:
|
||||
score += 10
|
||||
if serp.sitelinks:
|
||||
score += 5
|
||||
score += min(10, len(serp.features) * 2)
|
||||
|
||||
# Social profiles (10 points)
|
||||
if result.social_profile_status:
|
||||
accessible = sum(1 for v in result.social_profile_status.values() if v)
|
||||
total = len(result.social_profile_status)
|
||||
score += (accessible / total) * 10 if total > 0 else 0
|
||||
|
||||
# sameAs links (10 points)
|
||||
total_same_as = sum(len(s.same_as_links) for s in result.entity_schemas)
|
||||
score += min(10, total_same_as * 2)
|
||||
|
||||
return round(min(100, score), 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main orchestrator
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def audit(
|
||||
self,
|
||||
url: str,
|
||||
entity_name: str,
|
||||
include_paa: bool = True,
|
||||
include_faq: bool = True,
|
||||
) -> EntityAuditResult:
|
||||
"""Orchestrate full entity SEO audit."""
|
||||
result = EntityAuditResult(url=url, entity_name=entity_name)
|
||||
logger.info("Starting entity audit for '%s' at %s", entity_name, url)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Parallel tasks: entity schema, brand SERP, FAQ
|
||||
tasks = [
|
||||
self.audit_entity_schema(url, session),
|
||||
self.analyze_brand_serp(entity_name, session),
|
||||
]
|
||||
|
||||
if include_faq:
|
||||
tasks.append(self.track_faq_rich_results(url, session))
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Unpack results
|
||||
if not isinstance(results[0], Exception):
|
||||
result.entity_schemas = results[0]
|
||||
else:
|
||||
logger.error("Entity schema audit failed: %s", results[0])
|
||||
|
||||
if not isinstance(results[1], Exception):
|
||||
result.brand_serp = results[1]
|
||||
else:
|
||||
logger.error("Brand SERP analysis failed: %s", results[1])
|
||||
|
||||
if include_faq and len(results) > 2 and not isinstance(results[2], Exception):
|
||||
result.faq_rich_results = results[2]
|
||||
|
||||
# PAA monitoring (sequential due to rate limits)
|
||||
if include_paa:
|
||||
result.paa_questions = await self.monitor_paa(entity_name, session=session)
|
||||
|
||||
# Validate social profile links from schema
|
||||
all_same_as = []
|
||||
for schema in result.entity_schemas:
|
||||
all_same_as.extend(schema.same_as_links)
|
||||
if all_same_as:
|
||||
result.social_profile_status = await self.check_social_profile_links(
|
||||
list(set(all_same_as)), session
|
||||
)
|
||||
|
||||
# Compute score and recommendations
|
||||
result.overall_score = self.compute_score(result)
|
||||
result.recommendations = self.generate_recommendations(result)
|
||||
|
||||
logger.info("Entity audit complete. Score: %.1f", result.overall_score)
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI display helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def display_result(result: EntityAuditResult) -> None:
|
||||
"""Display audit result in rich tables."""
|
||||
console.print()
|
||||
console.print(f"[bold cyan]Entity SEO Audit: {result.entity_name}[/bold cyan]")
|
||||
console.print(f"URL: {result.url} | Score: {result.overall_score}/100")
|
||||
console.print()
|
||||
|
||||
# Entity Schema table
|
||||
if result.entity_schemas:
|
||||
table = Table(title="Entity Schema Markup", show_header=True)
|
||||
table.add_column("Type", style="bold")
|
||||
table.add_column("Completeness")
|
||||
table.add_column("sameAs Links")
|
||||
table.add_column("Issues")
|
||||
|
||||
for schema in result.entity_schemas:
|
||||
issues_text = "; ".join(schema.issues[:3]) if schema.issues else "None"
|
||||
table.add_row(
|
||||
schema.type,
|
||||
f"{schema.completeness}%",
|
||||
str(len(schema.same_as_links)),
|
||||
issues_text,
|
||||
)
|
||||
console.print(table)
|
||||
else:
|
||||
console.print("[red]No entity schema markup found on website![/red]")
|
||||
console.print()
|
||||
|
||||
# Brand SERP table
|
||||
serp = result.brand_serp
|
||||
serp_table = Table(title="Brand SERP Analysis", show_header=True)
|
||||
serp_table.add_column("Feature", style="bold")
|
||||
serp_table.add_column("Status")
|
||||
|
||||
serp_table.add_row("Knowledge Panel", "[green]Yes[/]" if serp.knowledge_panel else "[red]No[/]")
|
||||
serp_table.add_row("Sitelinks", "[green]Yes[/]" if serp.sitelinks else "[red]No[/]")
|
||||
serp_table.add_row("PAA Count", str(serp.paa_count))
|
||||
serp_table.add_row("SERP Features", ", ".join(serp.features) if serp.features else "None")
|
||||
serp_table.add_row("Social Profiles", ", ".join(serp.social_profiles) if serp.social_profiles else "None")
|
||||
|
||||
console.print(serp_table)
|
||||
console.print()
|
||||
|
||||
# PAA Questions
|
||||
if result.paa_questions:
|
||||
paa_table = Table(title=f"People Also Ask ({len(result.paa_questions)} questions)", show_header=True)
|
||||
paa_table.add_column("#", style="dim")
|
||||
paa_table.add_column("Question")
|
||||
paa_table.add_column("Keyword")
|
||||
|
||||
for i, q in enumerate(result.paa_questions[:15], 1):
|
||||
paa_table.add_row(str(i), q.question, q.keyword)
|
||||
console.print(paa_table)
|
||||
console.print()
|
||||
|
||||
# FAQ Rich Results
|
||||
if result.faq_rich_results:
|
||||
faq_table = Table(title="FAQ Rich Results", show_header=True)
|
||||
faq_table.add_column("URL")
|
||||
faq_table.add_column("Questions")
|
||||
faq_table.add_column("Valid")
|
||||
|
||||
for faq in result.faq_rich_results:
|
||||
faq_table.add_row(
|
||||
faq.url[:60],
|
||||
str(faq.question_count),
|
||||
"[green]Yes[/]" if faq.schema_valid else "[red]No[/]",
|
||||
)
|
||||
console.print(faq_table)
|
||||
console.print()
|
||||
|
||||
# Social Profile Status
|
||||
if result.social_profile_status:
|
||||
sp_table = Table(title="Social Profile Link Status", show_header=True)
|
||||
sp_table.add_column("URL")
|
||||
sp_table.add_column("Accessible")
|
||||
|
||||
for link, accessible in result.social_profile_status.items():
|
||||
sp_table.add_row(
|
||||
link[:70],
|
||||
"[green]Yes[/]" if accessible else "[red]No[/]",
|
||||
)
|
||||
console.print(sp_table)
|
||||
console.print()
|
||||
|
||||
# Recommendations
|
||||
console.print("[bold yellow]Recommendations:[/bold yellow]")
|
||||
for i, rec in enumerate(result.recommendations, 1):
|
||||
console.print(f" {i}. {rec}")
|
||||
console.print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Entity SEO Auditor",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("--url", required=True, help="Website URL to audit")
|
||||
parser.add_argument("--entity", required=True, help="Entity/brand name")
|
||||
parser.add_argument("--paa", action="store_true", default=True, help="Include PAA monitoring (default: True)")
|
||||
parser.add_argument("--no-paa", action="store_true", help="Skip PAA monitoring")
|
||||
parser.add_argument("--faq", action="store_true", default=True, help="Include FAQ tracking (default: True)")
|
||||
parser.add_argument("--no-faq", action="store_true", help="Skip FAQ tracking")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--output", type=str, help="Output file path")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
auditor = EntityAuditor()
|
||||
result = await auditor.audit(
|
||||
url=args.url,
|
||||
entity_name=args.entity,
|
||||
include_paa=not args.no_paa,
|
||||
include_faq=not args.no_faq,
|
||||
)
|
||||
|
||||
if args.json:
|
||||
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(output)
|
||||
console.print(f"[green]Output saved to {args.output}[/green]")
|
||||
else:
|
||||
print(output)
|
||||
else:
|
||||
display_result(result)
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
console.print(f"[green]Output saved to {args.output}[/green]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user