Files
Andrew Yim a3ff965b87 Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking,
Link Building, Content Strategy, E-Commerce SEO, KPI Framework,
International SEO, AI Visibility, Knowledge Graph, Competitor Intel,
and Crawl Budget. ~20K lines of Python across 25 domain scripts.
Updated skill 11 pipeline table and repo CLAUDE.md.
Enhanced skill 18 local SEO workflow from jamie.clinic audit.

Note: Skill 26 hreflang_validator.py pending (content filter block).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:05:59 +09:00

903 lines
35 KiB
Python

"""
Entity Auditor
===============
Purpose: Audit entity SEO signals including PAA monitoring, FAQ schema tracking,
entity markup validation, and brand SERP analysis.
Python: 3.10+
"""
import argparse
import asyncio
import json
import logging
import re
import sys
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Any
from urllib.parse import quote, urljoin, urlparse
import aiohttp
from bs4 import BeautifulSoup
from rich.console import Console
from rich.table import Table
from base_client import BaseAsyncClient, ConfigManager, config
logger = logging.getLogger(__name__)
console = Console()
# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------
@dataclass
class PaaQuestion:
"""A People Also Ask question found in SERP."""
question: str = ""
keyword: str = ""
position: int = 0
source_url: str | None = None
@dataclass
class FaqRichResult:
"""FAQ rich result tracking entry."""
url: str = ""
question_count: int = 0
appearing_in_serp: bool = False
questions: list[str] = field(default_factory=list)
schema_valid: bool = False
@dataclass
class EntitySchema:
"""Entity structured data found on a website."""
type: str = "" # Organization, Person, LocalBusiness, etc.
properties: dict[str, Any] = field(default_factory=dict)
same_as_links: list[str] = field(default_factory=list)
completeness: float = 0.0
issues: list[str] = field(default_factory=list)
@dataclass
class BrandSerpResult:
"""What appears when searching for the brand name."""
query: str = ""
features: list[str] = field(default_factory=list)
paa_count: int = 0
faq_count: int = 0
knowledge_panel: bool = False
sitelinks: bool = False
social_profiles: list[str] = field(default_factory=list)
top_results: list[dict[str, str]] = field(default_factory=list)
@dataclass
class EntityAuditResult:
"""Full entity SEO audit result."""
url: str = ""
entity_name: str = ""
paa_questions: list[PaaQuestion] = field(default_factory=list)
faq_rich_results: list[FaqRichResult] = field(default_factory=list)
entity_schemas: list[EntitySchema] = field(default_factory=list)
brand_serp: BrandSerpResult = field(default_factory=BrandSerpResult)
social_profile_status: dict[str, bool] = field(default_factory=dict)
overall_score: float = 0.0
recommendations: list[str] = field(default_factory=list)
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
def to_dict(self) -> dict[str, Any]:
return asdict(self)
# ---------------------------------------------------------------------------
# Entity Auditor
# ---------------------------------------------------------------------------
class EntityAuditor(BaseAsyncClient):
"""Audit entity SEO signals and rich result presence."""
GOOGLE_SEARCH_URL = "https://www.google.com/search"
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
),
"Accept-Language": "en-US,en;q=0.9",
}
PAA_KEYWORD_TEMPLATES = [
"{entity}",
"{entity} reviews",
"{entity} vs",
"what is {entity}",
"{entity} pricing",
"{entity} alternatives",
"is {entity} good",
"{entity} benefits",
"how to use {entity}",
"{entity} complaints",
]
EXPECTED_SCHEMA_PROPERTIES = {
"Organization": [
"name", "url", "logo", "description", "sameAs",
"contactPoint", "address", "foundingDate", "founder",
"numberOfEmployees", "email", "telephone",
],
"Person": [
"name", "url", "image", "description", "sameAs",
"jobTitle", "worksFor", "alumniOf", "birthDate",
],
"LocalBusiness": [
"name", "url", "image", "description", "sameAs",
"address", "telephone", "openingHours", "geo",
"priceRange", "aggregateRating",
],
}
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.config = config
# ------------------------------------------------------------------
# PAA monitoring
# ------------------------------------------------------------------
async def monitor_paa(
self,
entity_name: str,
keywords: list[str] | None = None,
session: aiohttp.ClientSession | None = None,
) -> list[PaaQuestion]:
"""Search brand keywords and extract People Also Ask questions."""
if keywords is None:
keywords = [t.format(entity=entity_name) for t in self.PAA_KEYWORD_TEMPLATES]
paa_questions: list[PaaQuestion] = []
own_session = session is None
if own_session:
session = aiohttp.ClientSession()
try:
for keyword in keywords:
params = {"q": keyword, "hl": "en", "gl": "us"}
try:
async with session.get(
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
timeout=aiohttp.ClientTimeout(total=20),
) as resp:
if resp.status != 200:
logger.warning("Search for '%s' returned status %d", keyword, resp.status)
continue
html = await resp.text()
soup = BeautifulSoup(html, "lxml")
# PAA box selectors
paa_selectors = [
"div[data-sgrd] div[data-q]",
"div.related-question-pair",
"div[jsname] div[data-q]",
"div.wQiwMc",
]
position = 0
for selector in paa_selectors:
elements = soup.select(selector)
for el in elements:
question_text = el.get("data-q", "") or el.get_text(strip=True)
if question_text and len(question_text) > 5:
position += 1
paa_questions.append(PaaQuestion(
question=question_text,
keyword=keyword,
position=position,
))
# Fallback: regex for PAA-like questions
if not paa_questions:
text = soup.get_text(separator="\n")
q_patterns = re.findall(
r"((?:What|How|Why|When|Where|Who|Is|Can|Does|Do|Which)\s+[^?\n]{10,80}\??)",
text,
)
for i, q in enumerate(q_patterns[:8]):
paa_questions.append(PaaQuestion(
question=q.strip(),
keyword=keyword,
position=i + 1,
))
except Exception as exc:
logger.error("PAA search failed for '%s': %s", keyword, exc)
continue
# Rate limit between searches
await asyncio.sleep(1.5)
finally:
if own_session:
await session.close()
# Deduplicate questions
seen = set()
unique = []
for q in paa_questions:
key = q.question.lower().strip()
if key not in seen:
seen.add(key)
unique.append(q)
logger.info("Found %d unique PAA questions for '%s'", len(unique), entity_name)
return unique
# ------------------------------------------------------------------
# FAQ rich result tracking
# ------------------------------------------------------------------
async def track_faq_rich_results(
self,
url: str,
session: aiohttp.ClientSession | None = None,
) -> list[FaqRichResult]:
"""Check pages for FAQPage schema and SERP appearance."""
faq_results: list[FaqRichResult] = []
domain = urlparse(url).netloc
own_session = session is None
if own_session:
session = aiohttp.ClientSession()
try:
# Fetch the page and look for FAQ schema
async with session.get(
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
) as resp:
if resp.status != 200:
logger.warning("Page %s returned status %d", url, resp.status)
return faq_results
html = await resp.text()
soup = BeautifulSoup(html, "lxml")
# Find JSON-LD scripts with FAQPage
scripts = soup.find_all("script", type="application/ld+json")
for script in scripts:
try:
data = json.loads(script.string or "{}")
items = data if isinstance(data, list) else [data]
for item in items:
schema_type = item.get("@type", "")
if schema_type == "FAQPage" or (
isinstance(schema_type, list) and "FAQPage" in schema_type
):
questions = item.get("mainEntity", [])
faq = FaqRichResult(
url=url,
question_count=len(questions),
questions=[
q.get("name", "") for q in questions if isinstance(q, dict)
],
schema_valid=True,
)
faq_results.append(faq)
# Check for nested @graph
graph = item.get("@graph", [])
for g_item in graph:
if g_item.get("@type") == "FAQPage":
questions = g_item.get("mainEntity", [])
faq = FaqRichResult(
url=url,
question_count=len(questions),
questions=[
q.get("name", "") for q in questions if isinstance(q, dict)
],
schema_valid=True,
)
faq_results.append(faq)
except json.JSONDecodeError:
continue
# Also check for microdata FAQ markup
faq_items = soup.select("[itemtype*='FAQPage'] [itemprop='mainEntity']")
if faq_items and not faq_results:
questions = []
for item in faq_items:
q_el = item.select_one("[itemprop='name']")
if q_el:
questions.append(q_el.get_text(strip=True))
faq_results.append(FaqRichResult(
url=url,
question_count=len(questions),
questions=questions,
schema_valid=True,
))
except Exception as exc:
logger.error("FAQ tracking failed for %s: %s", url, exc)
finally:
if own_session:
await session.close()
logger.info("Found %d FAQ schemas on %s", len(faq_results), url)
return faq_results
# ------------------------------------------------------------------
# Entity schema audit
# ------------------------------------------------------------------
async def audit_entity_schema(
self,
url: str,
session: aiohttp.ClientSession | None = None,
) -> list[EntitySchema]:
"""Check Organization/Person/LocalBusiness schema on website."""
schemas: list[EntitySchema] = []
target_types = {"Organization", "Person", "LocalBusiness", "Corporation", "MedicalBusiness"}
own_session = session is None
if own_session:
session = aiohttp.ClientSession()
try:
async with session.get(
url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
) as resp:
if resp.status != 200:
logger.warning("Page %s returned status %d", url, resp.status)
return schemas
html = await resp.text()
soup = BeautifulSoup(html, "lxml")
scripts = soup.find_all("script", type="application/ld+json")
for script in scripts:
try:
data = json.loads(script.string or "{}")
items = data if isinstance(data, list) else [data]
# Include @graph nested items
expanded = []
for item in items:
expanded.append(item)
if "@graph" in item:
expanded.extend(item["@graph"])
for item in expanded:
item_type = item.get("@type", "")
if isinstance(item_type, list):
matching = [t for t in item_type if t in target_types]
if not matching:
continue
item_type = matching[0]
elif item_type not in target_types:
continue
same_as = item.get("sameAs", [])
if isinstance(same_as, str):
same_as = [same_as]
# Calculate completeness
base_type = item_type
if base_type == "Corporation":
base_type = "Organization"
elif base_type == "MedicalBusiness":
base_type = "LocalBusiness"
expected = self.EXPECTED_SCHEMA_PROPERTIES.get(base_type, [])
present = [k for k in expected if k in item and item[k]]
completeness = round((len(present) / len(expected)) * 100, 1) if expected else 0
# Check for issues
issues = []
if "name" not in item:
issues.append("Missing 'name' property")
if "url" not in item:
issues.append("Missing 'url' property")
if not same_as:
issues.append("No 'sameAs' links (social profiles)")
if "logo" not in item and base_type == "Organization":
issues.append("Missing 'logo' property")
if "description" not in item:
issues.append("Missing 'description' property")
schema = EntitySchema(
type=item_type,
properties={k: (str(v)[:100] if not isinstance(v, (list, dict)) else v) for k, v in item.items() if k != "@context"},
same_as_links=same_as,
completeness=completeness,
issues=issues,
)
schemas.append(schema)
except json.JSONDecodeError:
continue
except Exception as exc:
logger.error("Entity schema audit failed for %s: %s", url, exc)
finally:
if own_session:
await session.close()
logger.info("Found %d entity schemas on %s", len(schemas), url)
return schemas
# ------------------------------------------------------------------
# Brand SERP analysis
# ------------------------------------------------------------------
async def analyze_brand_serp(
self,
entity_name: str,
session: aiohttp.ClientSession | None = None,
) -> BrandSerpResult:
"""Analyze what appears in SERP for the brand name search."""
result = BrandSerpResult(query=entity_name)
own_session = session is None
if own_session:
session = aiohttp.ClientSession()
try:
params = {"q": entity_name, "hl": "en", "gl": "us"}
async with session.get(
self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
timeout=aiohttp.ClientTimeout(total=20),
) as resp:
if resp.status != 200:
return result
html = await resp.text()
soup = BeautifulSoup(html, "lxml")
text = soup.get_text(separator=" ", strip=True).lower()
# Detect SERP features
feature_indicators = {
"knowledge_panel": ["kp-wholepage", "knowledge-panel", "kno-"],
"sitelinks": ["sitelinks", "site-links"],
"people_also_ask": ["related-question-pair", "data-q"],
"faq_rich_result": ["faqpage", "frequently asked"],
"featured_snippet": ["featured-snippet", "data-tts"],
"image_pack": ["image-result", "img-brk"],
"video_carousel": ["video-result", "vid-"],
"twitter_carousel": ["twitter-timeline", "g-scrolling-carousel"],
"reviews": ["star-rating", "aggregate-rating"],
"local_pack": ["local-pack", "local_pack"],
}
for feature, indicators in feature_indicators.items():
for ind in indicators:
if ind in str(soup).lower():
result.features.append(feature)
break
result.knowledge_panel = "knowledge_panel" in result.features
result.sitelinks = "sitelinks" in result.features
# Count PAA questions
paa_elements = soup.select("div[data-q], div.related-question-pair")
result.paa_count = len(paa_elements)
if result.paa_count > 0 and "people_also_ask" not in result.features:
result.features.append("people_also_ask")
# Detect social profiles in results
social_domains = {
"twitter.com": "twitter", "x.com": "twitter",
"facebook.com": "facebook", "linkedin.com": "linkedin",
"youtube.com": "youtube", "instagram.com": "instagram",
"github.com": "github", "pinterest.com": "pinterest",
}
links = soup.find_all("a", href=True)
for link in links:
href = link["href"]
for domain, name in social_domains.items():
if domain in href and name not in result.social_profiles:
result.social_profiles.append(name)
# Extract top organic results
result_divs = soup.select("div.g, div[data-sokoban-container]")[:10]
for div in result_divs:
title_el = div.select_one("h3")
link_el = div.select_one("a[href]")
if title_el and link_el:
result.top_results.append({
"title": title_el.get_text(strip=True),
"url": link_el.get("href", ""),
})
except Exception as exc:
logger.error("Brand SERP analysis failed for '%s': %s", entity_name, exc)
finally:
if own_session:
await session.close()
return result
# ------------------------------------------------------------------
# Social profile link validation
# ------------------------------------------------------------------
async def check_social_profile_links(
self,
same_as_links: list[str],
session: aiohttp.ClientSession | None = None,
) -> dict[str, bool]:
"""Validate sameAs URLs are accessible."""
status: dict[str, bool] = {}
if not same_as_links:
return status
own_session = session is None
if own_session:
session = aiohttp.ClientSession()
try:
for link in same_as_links:
try:
async with session.head(
link, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=10),
allow_redirects=True,
) as resp:
status[link] = resp.status < 400
except Exception:
status[link] = False
await asyncio.sleep(0.5)
finally:
if own_session:
await session.close()
accessible = sum(1 for v in status.values() if v)
logger.info("Social profile links: %d/%d accessible", accessible, len(status))
return status
# ------------------------------------------------------------------
# Recommendations
# ------------------------------------------------------------------
def generate_recommendations(self, result: EntityAuditResult) -> list[str]:
"""Generate actionable entity SEO improvement recommendations."""
recs: list[str] = []
# PAA recommendations
if not result.paa_questions:
recs.append(
"브랜드 관련 People Also Ask(PAA) 질문이 감지되지 않았습니다. "
"FAQ 콘텐츠를 작성하여 PAA 노출 기회를 확보하세요."
)
elif len(result.paa_questions) < 5:
recs.append(
f"PAA 질문이 {len(result.paa_questions)}개만 감지되었습니다. "
"더 다양한 키워드에 대한 Q&A 콘텐츠를 강화하세요."
)
# FAQ schema recommendations
if not result.faq_rich_results:
recs.append(
"FAQPage schema가 감지되지 않았습니다. "
"FAQ 페이지에 FAQPage JSON-LD를 추가하여 Rich Result를 확보하세요."
)
else:
invalid = [f for f in result.faq_rich_results if not f.schema_valid]
if invalid:
recs.append(
f"{len(invalid)}개의 FAQ schema에 유효성 문제가 있습니다. "
"Google Rich Results Test로 검증하세요."
)
# Entity schema recommendations
if not result.entity_schemas:
recs.append(
"Organization/Person/LocalBusiness schema가 없습니다. "
"홈페이지에 Organization schema JSON-LD를 추가하세요."
)
else:
for schema in result.entity_schemas:
if schema.completeness < 50:
recs.append(
f"{schema.type} schema 완성도가 {schema.completeness}%입니다. "
f"누락 항목: {', '.join(schema.issues[:3])}"
)
if not schema.same_as_links:
recs.append(
f"{schema.type} schema에 sameAs 속성이 없습니다. "
"소셜 미디어 프로필 URL을 sameAs에 추가하세요."
)
# Brand SERP recommendations
serp = result.brand_serp
if not serp.knowledge_panel:
recs.append(
"브랜드 검색 시 Knowledge Panel이 표시되지 않습니다. "
"Wikipedia, Wikidata, 구조화된 데이터를 통해 엔티티 인식을 강화하세요."
)
if not serp.sitelinks:
recs.append(
"Sitelinks가 표시되지 않습니다. "
"사이트 구조와 내부 링크를 개선하세요."
)
if len(serp.social_profiles) < 3:
recs.append(
f"SERP에 소셜 프로필이 {len(serp.social_profiles)}개만 표시됩니다. "
"주요 소셜 미디어 프로필을 활성화하고 schema sameAs에 연결하세요."
)
# Social profile accessibility
broken = [url for url, ok in result.social_profile_status.items() if not ok]
if broken:
recs.append(
f"접근 불가한 소셜 프로필 링크 {len(broken)}개: "
f"{', '.join(broken[:3])}. sameAs URL을 업데이트하세요."
)
if not recs:
recs.append("Entity SEO 상태가 양호합니다. 현재 수준을 유지하세요.")
return recs
# ------------------------------------------------------------------
# Scoring
# ------------------------------------------------------------------
def compute_score(self, result: EntityAuditResult) -> float:
"""Compute overall entity SEO score (0-100)."""
score = 0.0
# PAA presence (15 points)
paa_count = len(result.paa_questions)
if paa_count >= 10:
score += 15
elif paa_count >= 5:
score += 10
elif paa_count > 0:
score += 5
# FAQ schema (15 points)
if result.faq_rich_results:
valid_count = sum(1 for f in result.faq_rich_results if f.schema_valid)
score += min(15, valid_count * 5)
# Entity schema (25 points)
if result.entity_schemas:
best_completeness = max(s.completeness for s in result.entity_schemas)
score += best_completeness * 0.25
# Brand SERP features (25 points)
serp = result.brand_serp
if serp.knowledge_panel:
score += 10
if serp.sitelinks:
score += 5
score += min(10, len(serp.features) * 2)
# Social profiles (10 points)
if result.social_profile_status:
accessible = sum(1 for v in result.social_profile_status.values() if v)
total = len(result.social_profile_status)
score += (accessible / total) * 10 if total > 0 else 0
# sameAs links (10 points)
total_same_as = sum(len(s.same_as_links) for s in result.entity_schemas)
score += min(10, total_same_as * 2)
return round(min(100, score), 1)
# ------------------------------------------------------------------
# Main orchestrator
# ------------------------------------------------------------------
async def audit(
self,
url: str,
entity_name: str,
include_paa: bool = True,
include_faq: bool = True,
) -> EntityAuditResult:
"""Orchestrate full entity SEO audit."""
result = EntityAuditResult(url=url, entity_name=entity_name)
logger.info("Starting entity audit for '%s' at %s", entity_name, url)
async with aiohttp.ClientSession() as session:
# Parallel tasks: entity schema, brand SERP, FAQ
tasks = [
self.audit_entity_schema(url, session),
self.analyze_brand_serp(entity_name, session),
]
if include_faq:
tasks.append(self.track_faq_rich_results(url, session))
results = await asyncio.gather(*tasks, return_exceptions=True)
# Unpack results
if not isinstance(results[0], Exception):
result.entity_schemas = results[0]
else:
logger.error("Entity schema audit failed: %s", results[0])
if not isinstance(results[1], Exception):
result.brand_serp = results[1]
else:
logger.error("Brand SERP analysis failed: %s", results[1])
if include_faq and len(results) > 2 and not isinstance(results[2], Exception):
result.faq_rich_results = results[2]
# PAA monitoring (sequential due to rate limits)
if include_paa:
result.paa_questions = await self.monitor_paa(entity_name, session=session)
# Validate social profile links from schema
all_same_as = []
for schema in result.entity_schemas:
all_same_as.extend(schema.same_as_links)
if all_same_as:
result.social_profile_status = await self.check_social_profile_links(
list(set(all_same_as)), session
)
# Compute score and recommendations
result.overall_score = self.compute_score(result)
result.recommendations = self.generate_recommendations(result)
logger.info("Entity audit complete. Score: %.1f", result.overall_score)
return result
# ---------------------------------------------------------------------------
# CLI display helpers
# ---------------------------------------------------------------------------
def display_result(result: EntityAuditResult) -> None:
"""Display audit result in rich tables."""
console.print()
console.print(f"[bold cyan]Entity SEO Audit: {result.entity_name}[/bold cyan]")
console.print(f"URL: {result.url} | Score: {result.overall_score}/100")
console.print()
# Entity Schema table
if result.entity_schemas:
table = Table(title="Entity Schema Markup", show_header=True)
table.add_column("Type", style="bold")
table.add_column("Completeness")
table.add_column("sameAs Links")
table.add_column("Issues")
for schema in result.entity_schemas:
issues_text = "; ".join(schema.issues[:3]) if schema.issues else "None"
table.add_row(
schema.type,
f"{schema.completeness}%",
str(len(schema.same_as_links)),
issues_text,
)
console.print(table)
else:
console.print("[red]No entity schema markup found on website![/red]")
console.print()
# Brand SERP table
serp = result.brand_serp
serp_table = Table(title="Brand SERP Analysis", show_header=True)
serp_table.add_column("Feature", style="bold")
serp_table.add_column("Status")
serp_table.add_row("Knowledge Panel", "[green]Yes[/]" if serp.knowledge_panel else "[red]No[/]")
serp_table.add_row("Sitelinks", "[green]Yes[/]" if serp.sitelinks else "[red]No[/]")
serp_table.add_row("PAA Count", str(serp.paa_count))
serp_table.add_row("SERP Features", ", ".join(serp.features) if serp.features else "None")
serp_table.add_row("Social Profiles", ", ".join(serp.social_profiles) if serp.social_profiles else "None")
console.print(serp_table)
console.print()
# PAA Questions
if result.paa_questions:
paa_table = Table(title=f"People Also Ask ({len(result.paa_questions)} questions)", show_header=True)
paa_table.add_column("#", style="dim")
paa_table.add_column("Question")
paa_table.add_column("Keyword")
for i, q in enumerate(result.paa_questions[:15], 1):
paa_table.add_row(str(i), q.question, q.keyword)
console.print(paa_table)
console.print()
# FAQ Rich Results
if result.faq_rich_results:
faq_table = Table(title="FAQ Rich Results", show_header=True)
faq_table.add_column("URL")
faq_table.add_column("Questions")
faq_table.add_column("Valid")
for faq in result.faq_rich_results:
faq_table.add_row(
faq.url[:60],
str(faq.question_count),
"[green]Yes[/]" if faq.schema_valid else "[red]No[/]",
)
console.print(faq_table)
console.print()
# Social Profile Status
if result.social_profile_status:
sp_table = Table(title="Social Profile Link Status", show_header=True)
sp_table.add_column("URL")
sp_table.add_column("Accessible")
for link, accessible in result.social_profile_status.items():
sp_table.add_row(
link[:70],
"[green]Yes[/]" if accessible else "[red]No[/]",
)
console.print(sp_table)
console.print()
# Recommendations
console.print("[bold yellow]Recommendations:[/bold yellow]")
for i, rec in enumerate(result.recommendations, 1):
console.print(f" {i}. {rec}")
console.print()
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Entity SEO Auditor",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--url", required=True, help="Website URL to audit")
parser.add_argument("--entity", required=True, help="Entity/brand name")
parser.add_argument("--paa", action="store_true", default=True, help="Include PAA monitoring (default: True)")
parser.add_argument("--no-paa", action="store_true", help="Skip PAA monitoring")
parser.add_argument("--faq", action="store_true", default=True, help="Include FAQ tracking (default: True)")
parser.add_argument("--no-faq", action="store_true", help="Skip FAQ tracking")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--output", type=str, help="Output file path")
return parser.parse_args()
async def main() -> None:
args = parse_args()
auditor = EntityAuditor()
result = await auditor.audit(
url=args.url,
entity_name=args.entity,
include_paa=not args.no_paa,
include_faq=not args.no_faq,
)
if args.json:
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
if args.output:
with open(args.output, "w", encoding="utf-8") as f:
f.write(output)
console.print(f"[green]Output saved to {args.output}[/green]")
else:
print(output)
else:
display_result(result)
if args.output:
with open(args.output, "w", encoding="utf-8") as f:
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
console.print(f"[green]Output saved to {args.output}[/green]")
if __name__ == "__main__":
asyncio.run(main())