12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
903 lines
35 KiB
Python
903 lines
35 KiB
Python
"""
|
|
Entity Auditor
|
|
===============
|
|
Purpose: Audit entity SEO signals including PAA monitoring, FAQ schema tracking,
|
|
entity markup validation, and brand SERP analysis.
|
|
Python: 3.10+
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import re
|
|
import sys
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime
|
|
from typing import Any
|
|
from urllib.parse import quote, urljoin, urlparse
|
|
|
|
import aiohttp
|
|
from bs4 import BeautifulSoup
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
from base_client import BaseAsyncClient, ConfigManager, config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
console = Console()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data classes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class PaaQuestion:
|
|
"""A People Also Ask question found in SERP."""
|
|
question: str = ""
|
|
keyword: str = ""
|
|
position: int = 0
|
|
source_url: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class FaqRichResult:
|
|
"""FAQ rich result tracking entry."""
|
|
url: str = ""
|
|
question_count: int = 0
|
|
appearing_in_serp: bool = False
|
|
questions: list[str] = field(default_factory=list)
|
|
schema_valid: bool = False
|
|
|
|
|
|
@dataclass
|
|
class EntitySchema:
|
|
"""Entity structured data found on a website."""
|
|
type: str = "" # Organization, Person, LocalBusiness, etc.
|
|
properties: dict[str, Any] = field(default_factory=dict)
|
|
same_as_links: list[str] = field(default_factory=list)
|
|
completeness: float = 0.0
|
|
issues: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class BrandSerpResult:
|
|
"""What appears when searching for the brand name."""
|
|
query: str = ""
|
|
features: list[str] = field(default_factory=list)
|
|
paa_count: int = 0
|
|
faq_count: int = 0
|
|
knowledge_panel: bool = False
|
|
sitelinks: bool = False
|
|
social_profiles: list[str] = field(default_factory=list)
|
|
top_results: list[dict[str, str]] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class EntityAuditResult:
|
|
"""Full entity SEO audit result."""
|
|
url: str = ""
|
|
entity_name: str = ""
|
|
paa_questions: list[PaaQuestion] = field(default_factory=list)
|
|
faq_rich_results: list[FaqRichResult] = field(default_factory=list)
|
|
entity_schemas: list[EntitySchema] = field(default_factory=list)
|
|
brand_serp: BrandSerpResult = field(default_factory=BrandSerpResult)
|
|
social_profile_status: dict[str, bool] = field(default_factory=dict)
|
|
overall_score: float = 0.0
|
|
recommendations: list[str] = field(default_factory=list)
|
|
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return asdict(self)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entity Auditor
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class EntityAuditor(BaseAsyncClient):
|
|
"""Audit entity SEO signals and rich result presence."""
|
|
|
|
GOOGLE_SEARCH_URL = "https://www.google.com/search"
|
|
|
|
HEADERS = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/120.0.0.0 Safari/537.36"
|
|
),
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
|
|
PAA_KEYWORD_TEMPLATES = [
|
|
"{entity}",
|
|
"{entity} reviews",
|
|
"{entity} vs",
|
|
"what is {entity}",
|
|
"{entity} pricing",
|
|
"{entity} alternatives",
|
|
"is {entity} good",
|
|
"{entity} benefits",
|
|
"how to use {entity}",
|
|
"{entity} complaints",
|
|
]
|
|
|
|
EXPECTED_SCHEMA_PROPERTIES = {
|
|
"Organization": [
|
|
"name", "url", "logo", "description", "sameAs",
|
|
"contactPoint", "address", "foundingDate", "founder",
|
|
"numberOfEmployees", "email", "telephone",
|
|
],
|
|
"Person": [
|
|
"name", "url", "image", "description", "sameAs",
|
|
"jobTitle", "worksFor", "alumniOf", "birthDate",
|
|
],
|
|
"LocalBusiness": [
|
|
"name", "url", "image", "description", "sameAs",
|
|
"address", "telephone", "openingHours", "geo",
|
|
"priceRange", "aggregateRating",
|
|
],
|
|
}
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
self.config = config
|
|
|
|
# ------------------------------------------------------------------
|
|
# PAA monitoring
|
|
# ------------------------------------------------------------------
|
|
|
|
async def monitor_paa(
|
|
self,
|
|
entity_name: str,
|
|
keywords: list[str] | None = None,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> list[PaaQuestion]:
|
|
"""Search brand keywords and extract People Also Ask questions."""
|
|
if keywords is None:
|
|
keywords = [t.format(entity=entity_name) for t in self.PAA_KEYWORD_TEMPLATES]
|
|
|
|
paa_questions: list[PaaQuestion] = []
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
for keyword in keywords:
|
|
params = {"q": keyword, "hl": "en", "gl": "us"}
|
|
try:
|
|
async with session.get(
|
|
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
|
|
timeout=aiohttp.ClientTimeout(total=20),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("Search for '%s' returned status %d", keyword, resp.status)
|
|
continue
|
|
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
# PAA box selectors
|
|
paa_selectors = [
|
|
"div[data-sgrd] div[data-q]",
|
|
"div.related-question-pair",
|
|
"div[jsname] div[data-q]",
|
|
"div.wQiwMc",
|
|
]
|
|
|
|
position = 0
|
|
for selector in paa_selectors:
|
|
elements = soup.select(selector)
|
|
for el in elements:
|
|
question_text = el.get("data-q", "") or el.get_text(strip=True)
|
|
if question_text and len(question_text) > 5:
|
|
position += 1
|
|
paa_questions.append(PaaQuestion(
|
|
question=question_text,
|
|
keyword=keyword,
|
|
position=position,
|
|
))
|
|
|
|
# Fallback: regex for PAA-like questions
|
|
if not paa_questions:
|
|
text = soup.get_text(separator="\n")
|
|
q_patterns = re.findall(
|
|
r"((?:What|How|Why|When|Where|Who|Is|Can|Does|Do|Which)\s+[^?\n]{10,80}\??)",
|
|
text,
|
|
)
|
|
for i, q in enumerate(q_patterns[:8]):
|
|
paa_questions.append(PaaQuestion(
|
|
question=q.strip(),
|
|
keyword=keyword,
|
|
position=i + 1,
|
|
))
|
|
|
|
except Exception as exc:
|
|
logger.error("PAA search failed for '%s': %s", keyword, exc)
|
|
continue
|
|
|
|
# Rate limit between searches
|
|
await asyncio.sleep(1.5)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
# Deduplicate questions
|
|
seen = set()
|
|
unique = []
|
|
for q in paa_questions:
|
|
key = q.question.lower().strip()
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique.append(q)
|
|
|
|
logger.info("Found %d unique PAA questions for '%s'", len(unique), entity_name)
|
|
return unique
|
|
|
|
# ------------------------------------------------------------------
|
|
# FAQ rich result tracking
|
|
# ------------------------------------------------------------------
|
|
|
|
async def track_faq_rich_results(
|
|
self,
|
|
url: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> list[FaqRichResult]:
|
|
"""Check pages for FAQPage schema and SERP appearance."""
|
|
faq_results: list[FaqRichResult] = []
|
|
domain = urlparse(url).netloc
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
# Fetch the page and look for FAQ schema
|
|
async with session.get(
|
|
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("Page %s returned status %d", url, resp.status)
|
|
return faq_results
|
|
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
# Find JSON-LD scripts with FAQPage
|
|
scripts = soup.find_all("script", type="application/ld+json")
|
|
for script in scripts:
|
|
try:
|
|
data = json.loads(script.string or "{}")
|
|
items = data if isinstance(data, list) else [data]
|
|
|
|
for item in items:
|
|
schema_type = item.get("@type", "")
|
|
if schema_type == "FAQPage" or (
|
|
isinstance(schema_type, list) and "FAQPage" in schema_type
|
|
):
|
|
questions = item.get("mainEntity", [])
|
|
faq = FaqRichResult(
|
|
url=url,
|
|
question_count=len(questions),
|
|
questions=[
|
|
q.get("name", "") for q in questions if isinstance(q, dict)
|
|
],
|
|
schema_valid=True,
|
|
)
|
|
faq_results.append(faq)
|
|
|
|
# Check for nested @graph
|
|
graph = item.get("@graph", [])
|
|
for g_item in graph:
|
|
if g_item.get("@type") == "FAQPage":
|
|
questions = g_item.get("mainEntity", [])
|
|
faq = FaqRichResult(
|
|
url=url,
|
|
question_count=len(questions),
|
|
questions=[
|
|
q.get("name", "") for q in questions if isinstance(q, dict)
|
|
],
|
|
schema_valid=True,
|
|
)
|
|
faq_results.append(faq)
|
|
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
# Also check for microdata FAQ markup
|
|
faq_items = soup.select("[itemtype*='FAQPage'] [itemprop='mainEntity']")
|
|
if faq_items and not faq_results:
|
|
questions = []
|
|
for item in faq_items:
|
|
q_el = item.select_one("[itemprop='name']")
|
|
if q_el:
|
|
questions.append(q_el.get_text(strip=True))
|
|
faq_results.append(FaqRichResult(
|
|
url=url,
|
|
question_count=len(questions),
|
|
questions=questions,
|
|
schema_valid=True,
|
|
))
|
|
|
|
except Exception as exc:
|
|
logger.error("FAQ tracking failed for %s: %s", url, exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
logger.info("Found %d FAQ schemas on %s", len(faq_results), url)
|
|
return faq_results
|
|
|
|
# ------------------------------------------------------------------
|
|
# Entity schema audit
|
|
# ------------------------------------------------------------------
|
|
|
|
async def audit_entity_schema(
|
|
self,
|
|
url: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> list[EntitySchema]:
|
|
"""Check Organization/Person/LocalBusiness schema on website."""
|
|
schemas: list[EntitySchema] = []
|
|
target_types = {"Organization", "Person", "LocalBusiness", "Corporation", "MedicalBusiness"}
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(
|
|
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("Page %s returned status %d", url, resp.status)
|
|
return schemas
|
|
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
scripts = soup.find_all("script", type="application/ld+json")
|
|
for script in scripts:
|
|
try:
|
|
data = json.loads(script.string or "{}")
|
|
items = data if isinstance(data, list) else [data]
|
|
|
|
# Include @graph nested items
|
|
expanded = []
|
|
for item in items:
|
|
expanded.append(item)
|
|
if "@graph" in item:
|
|
expanded.extend(item["@graph"])
|
|
|
|
for item in expanded:
|
|
item_type = item.get("@type", "")
|
|
if isinstance(item_type, list):
|
|
matching = [t for t in item_type if t in target_types]
|
|
if not matching:
|
|
continue
|
|
item_type = matching[0]
|
|
elif item_type not in target_types:
|
|
continue
|
|
|
|
same_as = item.get("sameAs", [])
|
|
if isinstance(same_as, str):
|
|
same_as = [same_as]
|
|
|
|
# Calculate completeness
|
|
base_type = item_type
|
|
if base_type == "Corporation":
|
|
base_type = "Organization"
|
|
elif base_type == "MedicalBusiness":
|
|
base_type = "LocalBusiness"
|
|
|
|
expected = self.EXPECTED_SCHEMA_PROPERTIES.get(base_type, [])
|
|
present = [k for k in expected if k in item and item[k]]
|
|
completeness = round((len(present) / len(expected)) * 100, 1) if expected else 0
|
|
|
|
# Check for issues
|
|
issues = []
|
|
if "name" not in item:
|
|
issues.append("Missing 'name' property")
|
|
if "url" not in item:
|
|
issues.append("Missing 'url' property")
|
|
if not same_as:
|
|
issues.append("No 'sameAs' links (social profiles)")
|
|
if "logo" not in item and base_type == "Organization":
|
|
issues.append("Missing 'logo' property")
|
|
if "description" not in item:
|
|
issues.append("Missing 'description' property")
|
|
|
|
schema = EntitySchema(
|
|
type=item_type,
|
|
properties={k: (str(v)[:100] if not isinstance(v, (list, dict)) else v) for k, v in item.items() if k != "@context"},
|
|
same_as_links=same_as,
|
|
completeness=completeness,
|
|
issues=issues,
|
|
)
|
|
schemas.append(schema)
|
|
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
except Exception as exc:
|
|
logger.error("Entity schema audit failed for %s: %s", url, exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
logger.info("Found %d entity schemas on %s", len(schemas), url)
|
|
return schemas
|
|
|
|
# ------------------------------------------------------------------
|
|
# Brand SERP analysis
|
|
# ------------------------------------------------------------------
|
|
|
|
async def analyze_brand_serp(
|
|
self,
|
|
entity_name: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> BrandSerpResult:
|
|
"""Analyze what appears in SERP for the brand name search."""
|
|
result = BrandSerpResult(query=entity_name)
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
params = {"q": entity_name, "hl": "en", "gl": "us"}
|
|
async with session.get(
|
|
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
|
|
timeout=aiohttp.ClientTimeout(total=20),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
return result
|
|
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
text = soup.get_text(separator=" ", strip=True).lower()
|
|
|
|
# Detect SERP features
|
|
feature_indicators = {
|
|
"knowledge_panel": ["kp-wholepage", "knowledge-panel", "kno-"],
|
|
"sitelinks": ["sitelinks", "site-links"],
|
|
"people_also_ask": ["related-question-pair", "data-q"],
|
|
"faq_rich_result": ["faqpage", "frequently asked"],
|
|
"featured_snippet": ["featured-snippet", "data-tts"],
|
|
"image_pack": ["image-result", "img-brk"],
|
|
"video_carousel": ["video-result", "vid-"],
|
|
"twitter_carousel": ["twitter-timeline", "g-scrolling-carousel"],
|
|
"reviews": ["star-rating", "aggregate-rating"],
|
|
"local_pack": ["local-pack", "local_pack"],
|
|
}
|
|
|
|
for feature, indicators in feature_indicators.items():
|
|
for ind in indicators:
|
|
if ind in str(soup).lower():
|
|
result.features.append(feature)
|
|
break
|
|
|
|
result.knowledge_panel = "knowledge_panel" in result.features
|
|
result.sitelinks = "sitelinks" in result.features
|
|
|
|
# Count PAA questions
|
|
paa_elements = soup.select("div[data-q], div.related-question-pair")
|
|
result.paa_count = len(paa_elements)
|
|
if result.paa_count > 0 and "people_also_ask" not in result.features:
|
|
result.features.append("people_also_ask")
|
|
|
|
# Detect social profiles in results
|
|
social_domains = {
|
|
"twitter.com": "twitter", "x.com": "twitter",
|
|
"facebook.com": "facebook", "linkedin.com": "linkedin",
|
|
"youtube.com": "youtube", "instagram.com": "instagram",
|
|
"github.com": "github", "pinterest.com": "pinterest",
|
|
}
|
|
links = soup.find_all("a", href=True)
|
|
for link in links:
|
|
href = link["href"]
|
|
for domain, name in social_domains.items():
|
|
if domain in href and name not in result.social_profiles:
|
|
result.social_profiles.append(name)
|
|
|
|
# Extract top organic results
|
|
result_divs = soup.select("div.g, div[data-sokoban-container]")[:10]
|
|
for div in result_divs:
|
|
title_el = div.select_one("h3")
|
|
link_el = div.select_one("a[href]")
|
|
if title_el and link_el:
|
|
result.top_results.append({
|
|
"title": title_el.get_text(strip=True),
|
|
"url": link_el.get("href", ""),
|
|
})
|
|
|
|
except Exception as exc:
|
|
logger.error("Brand SERP analysis failed for '%s': %s", entity_name, exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Social profile link validation
|
|
# ------------------------------------------------------------------
|
|
|
|
async def check_social_profile_links(
|
|
self,
|
|
same_as_links: list[str],
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, bool]:
|
|
"""Validate sameAs URLs are accessible."""
|
|
status: dict[str, bool] = {}
|
|
if not same_as_links:
|
|
return status
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
for link in same_as_links:
|
|
try:
|
|
async with session.head(
|
|
link, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=10),
|
|
allow_redirects=True,
|
|
) as resp:
|
|
status[link] = resp.status < 400
|
|
except Exception:
|
|
status[link] = False
|
|
|
|
await asyncio.sleep(0.5)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
accessible = sum(1 for v in status.values() if v)
|
|
logger.info("Social profile links: %d/%d accessible", accessible, len(status))
|
|
return status
|
|
|
|
# ------------------------------------------------------------------
|
|
# Recommendations
|
|
# ------------------------------------------------------------------
|
|
|
|
def generate_recommendations(self, result: EntityAuditResult) -> list[str]:
|
|
"""Generate actionable entity SEO improvement recommendations."""
|
|
recs: list[str] = []
|
|
|
|
# PAA recommendations
|
|
if not result.paa_questions:
|
|
recs.append(
|
|
"브랜드 관련 People Also Ask(PAA) 질문이 감지되지 않았습니다. "
|
|
"FAQ 콘텐츠를 작성하여 PAA 노출 기회를 확보하세요."
|
|
)
|
|
elif len(result.paa_questions) < 5:
|
|
recs.append(
|
|
f"PAA 질문이 {len(result.paa_questions)}개만 감지되었습니다. "
|
|
"더 다양한 키워드에 대한 Q&A 콘텐츠를 강화하세요."
|
|
)
|
|
|
|
# FAQ schema recommendations
|
|
if not result.faq_rich_results:
|
|
recs.append(
|
|
"FAQPage schema가 감지되지 않았습니다. "
|
|
"FAQ 페이지에 FAQPage JSON-LD를 추가하여 Rich Result를 확보하세요."
|
|
)
|
|
else:
|
|
invalid = [f for f in result.faq_rich_results if not f.schema_valid]
|
|
if invalid:
|
|
recs.append(
|
|
f"{len(invalid)}개의 FAQ schema에 유효성 문제가 있습니다. "
|
|
"Google Rich Results Test로 검증하세요."
|
|
)
|
|
|
|
# Entity schema recommendations
|
|
if not result.entity_schemas:
|
|
recs.append(
|
|
"Organization/Person/LocalBusiness schema가 없습니다. "
|
|
"홈페이지에 Organization schema JSON-LD를 추가하세요."
|
|
)
|
|
else:
|
|
for schema in result.entity_schemas:
|
|
if schema.completeness < 50:
|
|
recs.append(
|
|
f"{schema.type} schema 완성도가 {schema.completeness}%입니다. "
|
|
f"누락 항목: {', '.join(schema.issues[:3])}"
|
|
)
|
|
if not schema.same_as_links:
|
|
recs.append(
|
|
f"{schema.type} schema에 sameAs 속성이 없습니다. "
|
|
"소셜 미디어 프로필 URL을 sameAs에 추가하세요."
|
|
)
|
|
|
|
# Brand SERP recommendations
|
|
serp = result.brand_serp
|
|
if not serp.knowledge_panel:
|
|
recs.append(
|
|
"브랜드 검색 시 Knowledge Panel이 표시되지 않습니다. "
|
|
"Wikipedia, Wikidata, 구조화된 데이터를 통해 엔티티 인식을 강화하세요."
|
|
)
|
|
if not serp.sitelinks:
|
|
recs.append(
|
|
"Sitelinks가 표시되지 않습니다. "
|
|
"사이트 구조와 내부 링크를 개선하세요."
|
|
)
|
|
if len(serp.social_profiles) < 3:
|
|
recs.append(
|
|
f"SERP에 소셜 프로필이 {len(serp.social_profiles)}개만 표시됩니다. "
|
|
"주요 소셜 미디어 프로필을 활성화하고 schema sameAs에 연결하세요."
|
|
)
|
|
|
|
# Social profile accessibility
|
|
broken = [url for url, ok in result.social_profile_status.items() if not ok]
|
|
if broken:
|
|
recs.append(
|
|
f"접근 불가한 소셜 프로필 링크 {len(broken)}개: "
|
|
f"{', '.join(broken[:3])}. sameAs URL을 업데이트하세요."
|
|
)
|
|
|
|
if not recs:
|
|
recs.append("Entity SEO 상태가 양호합니다. 현재 수준을 유지하세요.")
|
|
|
|
return recs
|
|
|
|
# ------------------------------------------------------------------
|
|
# Scoring
|
|
# ------------------------------------------------------------------
|
|
|
|
def compute_score(self, result: EntityAuditResult) -> float:
|
|
"""Compute overall entity SEO score (0-100)."""
|
|
score = 0.0
|
|
|
|
# PAA presence (15 points)
|
|
paa_count = len(result.paa_questions)
|
|
if paa_count >= 10:
|
|
score += 15
|
|
elif paa_count >= 5:
|
|
score += 10
|
|
elif paa_count > 0:
|
|
score += 5
|
|
|
|
# FAQ schema (15 points)
|
|
if result.faq_rich_results:
|
|
valid_count = sum(1 for f in result.faq_rich_results if f.schema_valid)
|
|
score += min(15, valid_count * 5)
|
|
|
|
# Entity schema (25 points)
|
|
if result.entity_schemas:
|
|
best_completeness = max(s.completeness for s in result.entity_schemas)
|
|
score += best_completeness * 0.25
|
|
|
|
# Brand SERP features (25 points)
|
|
serp = result.brand_serp
|
|
if serp.knowledge_panel:
|
|
score += 10
|
|
if serp.sitelinks:
|
|
score += 5
|
|
score += min(10, len(serp.features) * 2)
|
|
|
|
# Social profiles (10 points)
|
|
if result.social_profile_status:
|
|
accessible = sum(1 for v in result.social_profile_status.values() if v)
|
|
total = len(result.social_profile_status)
|
|
score += (accessible / total) * 10 if total > 0 else 0
|
|
|
|
# sameAs links (10 points)
|
|
total_same_as = sum(len(s.same_as_links) for s in result.entity_schemas)
|
|
score += min(10, total_same_as * 2)
|
|
|
|
return round(min(100, score), 1)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Main orchestrator
|
|
# ------------------------------------------------------------------
|
|
|
|
async def audit(
|
|
self,
|
|
url: str,
|
|
entity_name: str,
|
|
include_paa: bool = True,
|
|
include_faq: bool = True,
|
|
) -> EntityAuditResult:
|
|
"""Orchestrate full entity SEO audit."""
|
|
result = EntityAuditResult(url=url, entity_name=entity_name)
|
|
logger.info("Starting entity audit for '%s' at %s", entity_name, url)
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
# Parallel tasks: entity schema, brand SERP, FAQ
|
|
tasks = [
|
|
self.audit_entity_schema(url, session),
|
|
self.analyze_brand_serp(entity_name, session),
|
|
]
|
|
|
|
if include_faq:
|
|
tasks.append(self.track_faq_rich_results(url, session))
|
|
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
# Unpack results
|
|
if not isinstance(results[0], Exception):
|
|
result.entity_schemas = results[0]
|
|
else:
|
|
logger.error("Entity schema audit failed: %s", results[0])
|
|
|
|
if not isinstance(results[1], Exception):
|
|
result.brand_serp = results[1]
|
|
else:
|
|
logger.error("Brand SERP analysis failed: %s", results[1])
|
|
|
|
if include_faq and len(results) > 2 and not isinstance(results[2], Exception):
|
|
result.faq_rich_results = results[2]
|
|
|
|
# PAA monitoring (sequential due to rate limits)
|
|
if include_paa:
|
|
result.paa_questions = await self.monitor_paa(entity_name, session=session)
|
|
|
|
# Validate social profile links from schema
|
|
all_same_as = []
|
|
for schema in result.entity_schemas:
|
|
all_same_as.extend(schema.same_as_links)
|
|
if all_same_as:
|
|
result.social_profile_status = await self.check_social_profile_links(
|
|
list(set(all_same_as)), session
|
|
)
|
|
|
|
# Compute score and recommendations
|
|
result.overall_score = self.compute_score(result)
|
|
result.recommendations = self.generate_recommendations(result)
|
|
|
|
logger.info("Entity audit complete. Score: %.1f", result.overall_score)
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI display helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def display_result(result: EntityAuditResult) -> None:
|
|
"""Display audit result in rich tables."""
|
|
console.print()
|
|
console.print(f"[bold cyan]Entity SEO Audit: {result.entity_name}[/bold cyan]")
|
|
console.print(f"URL: {result.url} | Score: {result.overall_score}/100")
|
|
console.print()
|
|
|
|
# Entity Schema table
|
|
if result.entity_schemas:
|
|
table = Table(title="Entity Schema Markup", show_header=True)
|
|
table.add_column("Type", style="bold")
|
|
table.add_column("Completeness")
|
|
table.add_column("sameAs Links")
|
|
table.add_column("Issues")
|
|
|
|
for schema in result.entity_schemas:
|
|
issues_text = "; ".join(schema.issues[:3]) if schema.issues else "None"
|
|
table.add_row(
|
|
schema.type,
|
|
f"{schema.completeness}%",
|
|
str(len(schema.same_as_links)),
|
|
issues_text,
|
|
)
|
|
console.print(table)
|
|
else:
|
|
console.print("[red]No entity schema markup found on website![/red]")
|
|
console.print()
|
|
|
|
# Brand SERP table
|
|
serp = result.brand_serp
|
|
serp_table = Table(title="Brand SERP Analysis", show_header=True)
|
|
serp_table.add_column("Feature", style="bold")
|
|
serp_table.add_column("Status")
|
|
|
|
serp_table.add_row("Knowledge Panel", "[green]Yes[/]" if serp.knowledge_panel else "[red]No[/]")
|
|
serp_table.add_row("Sitelinks", "[green]Yes[/]" if serp.sitelinks else "[red]No[/]")
|
|
serp_table.add_row("PAA Count", str(serp.paa_count))
|
|
serp_table.add_row("SERP Features", ", ".join(serp.features) if serp.features else "None")
|
|
serp_table.add_row("Social Profiles", ", ".join(serp.social_profiles) if serp.social_profiles else "None")
|
|
|
|
console.print(serp_table)
|
|
console.print()
|
|
|
|
# PAA Questions
|
|
if result.paa_questions:
|
|
paa_table = Table(title=f"People Also Ask ({len(result.paa_questions)} questions)", show_header=True)
|
|
paa_table.add_column("#", style="dim")
|
|
paa_table.add_column("Question")
|
|
paa_table.add_column("Keyword")
|
|
|
|
for i, q in enumerate(result.paa_questions[:15], 1):
|
|
paa_table.add_row(str(i), q.question, q.keyword)
|
|
console.print(paa_table)
|
|
console.print()
|
|
|
|
# FAQ Rich Results
|
|
if result.faq_rich_results:
|
|
faq_table = Table(title="FAQ Rich Results", show_header=True)
|
|
faq_table.add_column("URL")
|
|
faq_table.add_column("Questions")
|
|
faq_table.add_column("Valid")
|
|
|
|
for faq in result.faq_rich_results:
|
|
faq_table.add_row(
|
|
faq.url[:60],
|
|
str(faq.question_count),
|
|
"[green]Yes[/]" if faq.schema_valid else "[red]No[/]",
|
|
)
|
|
console.print(faq_table)
|
|
console.print()
|
|
|
|
# Social Profile Status
|
|
if result.social_profile_status:
|
|
sp_table = Table(title="Social Profile Link Status", show_header=True)
|
|
sp_table.add_column("URL")
|
|
sp_table.add_column("Accessible")
|
|
|
|
for link, accessible in result.social_profile_status.items():
|
|
sp_table.add_row(
|
|
link[:70],
|
|
"[green]Yes[/]" if accessible else "[red]No[/]",
|
|
)
|
|
console.print(sp_table)
|
|
console.print()
|
|
|
|
# Recommendations
|
|
console.print("[bold yellow]Recommendations:[/bold yellow]")
|
|
for i, rec in enumerate(result.recommendations, 1):
|
|
console.print(f" {i}. {rec}")
|
|
console.print()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Entity SEO Auditor",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument("--url", required=True, help="Website URL to audit")
|
|
parser.add_argument("--entity", required=True, help="Entity/brand name")
|
|
parser.add_argument("--paa", action="store_true", default=True, help="Include PAA monitoring (default: True)")
|
|
parser.add_argument("--no-paa", action="store_true", help="Skip PAA monitoring")
|
|
parser.add_argument("--faq", action="store_true", default=True, help="Include FAQ tracking (default: True)")
|
|
parser.add_argument("--no-faq", action="store_true", help="Skip FAQ tracking")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
parser.add_argument("--output", type=str, help="Output file path")
|
|
return parser.parse_args()
|
|
|
|
|
|
async def main() -> None:
|
|
args = parse_args()
|
|
|
|
auditor = EntityAuditor()
|
|
result = await auditor.audit(
|
|
url=args.url,
|
|
entity_name=args.entity,
|
|
include_paa=not args.no_paa,
|
|
include_faq=not args.no_faq,
|
|
)
|
|
|
|
if args.json:
|
|
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
f.write(output)
|
|
console.print(f"[green]Output saved to {args.output}[/green]")
|
|
else:
|
|
print(output)
|
|
else:
|
|
display_result(result)
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
|
|
console.print(f"[green]Output saved to {args.output}[/green]")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|