our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/entity_auditor.py

"""
Entity Auditor
===============
Purpose: Audit entity SEO signals including PAA monitoring, FAQ schema tracking,
         entity markup validation, and brand SERP analysis.
Python: 3.10+
"""

import argparse
import asyncio
import json
import logging
import re
import sys
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Any
from urllib.parse import quote, urljoin, urlparse

import aiohttp
from bs4 import BeautifulSoup
from rich.console import Console
from rich.table import Table

from base_client import BaseAsyncClient, ConfigManager, config

logger = logging.getLogger(__name__)
console = Console()

# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------


@dataclass
class PaaQuestion:
    """A People Also Ask question found in SERP."""
    question: str = ""
    keyword: str = ""
    position: int = 0
    source_url: str | None = None


@dataclass
class FaqRichResult:
    """FAQ rich result tracking entry."""
    url: str = ""
    question_count: int = 0
    appearing_in_serp: bool = False
    questions: list[str] = field(default_factory=list)
    schema_valid: bool = False


@dataclass
class EntitySchema:
    """Entity structured data found on a website."""
    type: str = ""  # Organization, Person, LocalBusiness, etc.
    properties: dict[str, Any] = field(default_factory=dict)
    same_as_links: list[str] = field(default_factory=list)
    completeness: float = 0.0
    issues: list[str] = field(default_factory=list)


@dataclass
class BrandSerpResult:
    """What appears when searching for the brand name."""
    query: str = ""
    features: list[str] = field(default_factory=list)
    paa_count: int = 0
    faq_count: int = 0
    knowledge_panel: bool = False
    sitelinks: bool = False
    social_profiles: list[str] = field(default_factory=list)
    top_results: list[dict[str, str]] = field(default_factory=list)


@dataclass
class EntityAuditResult:
    """Full entity SEO audit result."""
    url: str = ""
    entity_name: str = ""
    paa_questions: list[PaaQuestion] = field(default_factory=list)
    faq_rich_results: list[FaqRichResult] = field(default_factory=list)
    entity_schemas: list[EntitySchema] = field(default_factory=list)
    brand_serp: BrandSerpResult = field(default_factory=BrandSerpResult)
    social_profile_status: dict[str, bool] = field(default_factory=dict)
    overall_score: float = 0.0
    recommendations: list[str] = field(default_factory=list)
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


# ---------------------------------------------------------------------------
# Entity Auditor
# ---------------------------------------------------------------------------


class EntityAuditor(BaseAsyncClient):
    """Audit entity SEO signals and rich result presence."""

    GOOGLE_SEARCH_URL = "https://www.google.com/search"

    HEADERS = {
        "User-Agent": (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        ),
        "Accept-Language": "en-US,en;q=0.9",
    }

    PAA_KEYWORD_TEMPLATES = [
        "{entity}",
        "{entity} reviews",
        "{entity} vs",
        "what is {entity}",
        "{entity} pricing",
        "{entity} alternatives",
        "is {entity} good",
        "{entity} benefits",
        "how to use {entity}",
        "{entity} complaints",
    ]

    EXPECTED_SCHEMA_PROPERTIES = {
        "Organization": [
            "name", "url", "logo", "description", "sameAs",
            "contactPoint", "address", "foundingDate", "founder",
            "numberOfEmployees", "email", "telephone",
        ],
        "Person": [
            "name", "url", "image", "description", "sameAs",
            "jobTitle", "worksFor", "alumniOf", "birthDate",
        ],
        "LocalBusiness": [
            "name", "url", "image", "description", "sameAs",
            "address", "telephone", "openingHours", "geo",
            "priceRange", "aggregateRating",
        ],
    }

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.config = config

    # ------------------------------------------------------------------
    # PAA monitoring
    # ------------------------------------------------------------------

    async def monitor_paa(
        self,
        entity_name: str,
        keywords: list[str] | None = None,
        session: aiohttp.ClientSession | None = None,
    ) -> list[PaaQuestion]:
        """Search brand keywords and extract People Also Ask questions."""
        if keywords is None:
            keywords = [t.format(entity=entity_name) for t in self.PAA_KEYWORD_TEMPLATES]

        paa_questions: list[PaaQuestion] = []

        own_session = session is None
        if own_session:
            session = aiohttp.ClientSession()

        try:
            for keyword in keywords:
                params = {"q": keyword, "hl": "en", "gl": "us"}
                try:
                    async with session.get(
                        self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
                        timeout=aiohttp.ClientTimeout(total=20),
                    ) as resp:
                        if resp.status != 200:
                            logger.warning("Search for '%s' returned status %d", keyword, resp.status)
                            continue

                        html = await resp.text()
                        soup = BeautifulSoup(html, "lxml")

                        # PAA box selectors
                        paa_selectors = [
                            "div[data-sgrd] div[data-q]",
                            "div.related-question-pair",
                            "div[jsname] div[data-q]",
                            "div.wQiwMc",
                        ]

                        position = 0
                        for selector in paa_selectors:
                            elements = soup.select(selector)
                            for el in elements:
                                question_text = el.get("data-q", "") or el.get_text(strip=True)
                                if question_text and len(question_text) > 5:
                                    position += 1
                                    paa_questions.append(PaaQuestion(
                                        question=question_text,
                                        keyword=keyword,
                                        position=position,
                                    ))

                        # Fallback: regex for PAA-like questions
                        if not paa_questions:
                            text = soup.get_text(separator="\n")
                            q_patterns = re.findall(
                                r"((?:What|How|Why|When|Where|Who|Is|Can|Does|Do|Which)\s+[^?\n]{10,80}\??)",
                                text,
                            )
                            for i, q in enumerate(q_patterns[:8]):
                                paa_questions.append(PaaQuestion(
                                    question=q.strip(),
                                    keyword=keyword,
                                    position=i + 1,
                                ))

                except Exception as exc:
                    logger.error("PAA search failed for '%s': %s", keyword, exc)
                    continue

                # Rate limit between searches
                await asyncio.sleep(1.5)
        finally:
            if own_session:
                await session.close()

        # Deduplicate questions
        seen = set()
        unique = []
        for q in paa_questions:
            key = q.question.lower().strip()
            if key not in seen:
                seen.add(key)
                unique.append(q)

        logger.info("Found %d unique PAA questions for '%s'", len(unique), entity_name)
        return unique

    # ------------------------------------------------------------------
    # FAQ rich result tracking
    # ------------------------------------------------------------------

    async def track_faq_rich_results(
        self,
        url: str,
        session: aiohttp.ClientSession | None = None,
    ) -> list[FaqRichResult]:
        """Check pages for FAQPage schema and SERP appearance."""
        faq_results: list[FaqRichResult] = []
        domain = urlparse(url).netloc

        own_session = session is None
        if own_session:
            session = aiohttp.ClientSession()

        try:
            # Fetch the page and look for FAQ schema
            async with session.get(
                url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
            ) as resp:
                if resp.status != 200:
                    logger.warning("Page %s returned status %d", url, resp.status)
                    return faq_results

                html = await resp.text()
                soup = BeautifulSoup(html, "lxml")

                # Find JSON-LD scripts with FAQPage
                scripts = soup.find_all("script", type="application/ld+json")
                for script in scripts:
                    try:
                        data = json.loads(script.string or "{}")
                        items = data if isinstance(data, list) else [data]

                        for item in items:
                            schema_type = item.get("@type", "")
                            if schema_type == "FAQPage" or (
                                isinstance(schema_type, list) and "FAQPage" in schema_type
                            ):
                                questions = item.get("mainEntity", [])
                                faq = FaqRichResult(
                                    url=url,
                                    question_count=len(questions),
                                    questions=[
                                        q.get("name", "") for q in questions if isinstance(q, dict)
                                    ],
                                    schema_valid=True,
                                )
                                faq_results.append(faq)

                            # Check for nested @graph
                            graph = item.get("@graph", [])
                            for g_item in graph:
                                if g_item.get("@type") == "FAQPage":
                                    questions = g_item.get("mainEntity", [])
                                    faq = FaqRichResult(
                                        url=url,
                                        question_count=len(questions),
                                        questions=[
                                            q.get("name", "") for q in questions if isinstance(q, dict)
                                        ],
                                        schema_valid=True,
                                    )
                                    faq_results.append(faq)

                    except json.JSONDecodeError:
                        continue

                # Also check for microdata FAQ markup
                faq_items = soup.select("[itemtype*='FAQPage'] [itemprop='mainEntity']")
                if faq_items and not faq_results:
                    questions = []
                    for item in faq_items:
                        q_el = item.select_one("[itemprop='name']")
                        if q_el:
                            questions.append(q_el.get_text(strip=True))
                    faq_results.append(FaqRichResult(
                        url=url,
                        question_count=len(questions),
                        questions=questions,
                        schema_valid=True,
                    ))

        except Exception as exc:
            logger.error("FAQ tracking failed for %s: %s", url, exc)
        finally:
            if own_session:
                await session.close()

        logger.info("Found %d FAQ schemas on %s", len(faq_results), url)
        return faq_results

    # ------------------------------------------------------------------
    # Entity schema audit
    # ------------------------------------------------------------------

    async def audit_entity_schema(
        self,
        url: str,
        session: aiohttp.ClientSession | None = None,
    ) -> list[EntitySchema]:
        """Check Organization/Person/LocalBusiness schema on website."""
        schemas: list[EntitySchema] = []
        target_types = {"Organization", "Person", "LocalBusiness", "Corporation", "MedicalBusiness"}

        own_session = session is None
        if own_session:
            session = aiohttp.ClientSession()

        try:
            async with session.get(
                url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=20),
            ) as resp:
                if resp.status != 200:
                    logger.warning("Page %s returned status %d", url, resp.status)
                    return schemas

                html = await resp.text()
                soup = BeautifulSoup(html, "lxml")

                scripts = soup.find_all("script", type="application/ld+json")
                for script in scripts:
                    try:
                        data = json.loads(script.string or "{}")
                        items = data if isinstance(data, list) else [data]

                        # Include @graph nested items
                        expanded = []
                        for item in items:
                            expanded.append(item)
                            if "@graph" in item:
                                expanded.extend(item["@graph"])

                        for item in expanded:
                            item_type = item.get("@type", "")
                            if isinstance(item_type, list):
                                matching = [t for t in item_type if t in target_types]
                                if not matching:
                                    continue
                                item_type = matching[0]
                            elif item_type not in target_types:
                                continue

                            same_as = item.get("sameAs", [])
                            if isinstance(same_as, str):
                                same_as = [same_as]

                            # Calculate completeness
                            base_type = item_type
                            if base_type == "Corporation":
                                base_type = "Organization"
                            elif base_type == "MedicalBusiness":
                                base_type = "LocalBusiness"

                            expected = self.EXPECTED_SCHEMA_PROPERTIES.get(base_type, [])
                            present = [k for k in expected if k in item and item[k]]
                            completeness = round((len(present) / len(expected)) * 100, 1) if expected else 0

                            # Check for issues
                            issues = []
                            if "name" not in item:
                                issues.append("Missing 'name' property")
                            if "url" not in item:
                                issues.append("Missing 'url' property")
                            if not same_as:
                                issues.append("No 'sameAs' links (social profiles)")
                            if "logo" not in item and base_type == "Organization":
                                issues.append("Missing 'logo' property")
                            if "description" not in item:
                                issues.append("Missing 'description' property")

                            schema = EntitySchema(
                                type=item_type,
                                properties={k: (str(v)[:100] if not isinstance(v, (list, dict)) else v) for k, v in item.items() if k != "@context"},
                                same_as_links=same_as,
                                completeness=completeness,
                                issues=issues,
                            )
                            schemas.append(schema)

                    except json.JSONDecodeError:
                        continue

        except Exception as exc:
            logger.error("Entity schema audit failed for %s: %s", url, exc)
        finally:
            if own_session:
                await session.close()

        logger.info("Found %d entity schemas on %s", len(schemas), url)
        return schemas

    # ------------------------------------------------------------------
    # Brand SERP analysis
    # ------------------------------------------------------------------

    async def analyze_brand_serp(
        self,
        entity_name: str,
        session: aiohttp.ClientSession | None = None,
    ) -> BrandSerpResult:
        """Analyze what appears in SERP for the brand name search."""
        result = BrandSerpResult(query=entity_name)

        own_session = session is None
        if own_session:
            session = aiohttp.ClientSession()

        try:
            params = {"q": entity_name, "hl": "en", "gl": "us"}
            async with session.get(
                self.GOOGLE_SEARCH_URL, params=params, headers=self.HEADERS,
                timeout=aiohttp.ClientTimeout(total=20),
            ) as resp:
                if resp.status != 200:
                    return result

                html = await resp.text()
                soup = BeautifulSoup(html, "lxml")
                text = soup.get_text(separator=" ", strip=True).lower()

                # Detect SERP features
                feature_indicators = {
                    "knowledge_panel": ["kp-wholepage", "knowledge-panel", "kno-"],
                    "sitelinks": ["sitelinks", "site-links"],
                    "people_also_ask": ["related-question-pair", "data-q"],
                    "faq_rich_result": ["faqpage", "frequently asked"],
                    "featured_snippet": ["featured-snippet", "data-tts"],
                    "image_pack": ["image-result", "img-brk"],
                    "video_carousel": ["video-result", "vid-"],
                    "twitter_carousel": ["twitter-timeline", "g-scrolling-carousel"],
                    "reviews": ["star-rating", "aggregate-rating"],
                    "local_pack": ["local-pack", "local_pack"],
                }

                for feature, indicators in feature_indicators.items():
                    for ind in indicators:
                        if ind in str(soup).lower():
                            result.features.append(feature)
                            break

                result.knowledge_panel = "knowledge_panel" in result.features
                result.sitelinks = "sitelinks" in result.features

                # Count PAA questions
                paa_elements = soup.select("div[data-q], div.related-question-pair")
                result.paa_count = len(paa_elements)
                if result.paa_count > 0 and "people_also_ask" not in result.features:
                    result.features.append("people_also_ask")

                # Detect social profiles in results
                social_domains = {
                    "twitter.com": "twitter", "x.com": "twitter",
                    "facebook.com": "facebook", "linkedin.com": "linkedin",
                    "youtube.com": "youtube", "instagram.com": "instagram",
                    "github.com": "github", "pinterest.com": "pinterest",
                }
                links = soup.find_all("a", href=True)
                for link in links:
                    href = link["href"]
                    for domain, name in social_domains.items():
                        if domain in href and name not in result.social_profiles:
                            result.social_profiles.append(name)

                # Extract top organic results
                result_divs = soup.select("div.g, div[data-sokoban-container]")[:10]
                for div in result_divs:
                    title_el = div.select_one("h3")
                    link_el = div.select_one("a[href]")
                    if title_el and link_el:
                        result.top_results.append({
                            "title": title_el.get_text(strip=True),
                            "url": link_el.get("href", ""),
                        })

        except Exception as exc:
            logger.error("Brand SERP analysis failed for '%s': %s", entity_name, exc)
        finally:
            if own_session:
                await session.close()

        return result

    # ------------------------------------------------------------------
    # Social profile link validation
    # ------------------------------------------------------------------

    async def check_social_profile_links(
        self,
        same_as_links: list[str],
        session: aiohttp.ClientSession | None = None,
    ) -> dict[str, bool]:
        """Validate sameAs URLs are accessible."""
        status: dict[str, bool] = {}
        if not same_as_links:
            return status

        own_session = session is None
        if own_session:
            session = aiohttp.ClientSession()

        try:
            for link in same_as_links:
                try:
                    async with session.head(
                        link, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=10),
                        allow_redirects=True,
                    ) as resp:
                        status[link] = resp.status < 400
                except Exception:
                    status[link] = False

                await asyncio.sleep(0.5)
        finally:
            if own_session:
                await session.close()

        accessible = sum(1 for v in status.values() if v)
        logger.info("Social profile links: %d/%d accessible", accessible, len(status))
        return status

    # ------------------------------------------------------------------
    # Recommendations
    # ------------------------------------------------------------------

    def generate_recommendations(self, result: EntityAuditResult) -> list[str]:
        """Generate actionable entity SEO improvement recommendations."""
        recs: list[str] = []

        # PAA recommendations
        if not result.paa_questions:
            recs.append(
                "브랜드 관련 People Also Ask(PAA) 질문이 감지되지 않았습니다. "
                "FAQ 콘텐츠를 작성하여 PAA 노출 기회를 확보하세요."
            )
        elif len(result.paa_questions) < 5:
            recs.append(
                f"PAA 질문이 {len(result.paa_questions)}개만 감지되었습니다. "
                "더 다양한 키워드에 대한 Q&A 콘텐츠를 강화하세요."
            )

        # FAQ schema recommendations
        if not result.faq_rich_results:
            recs.append(
                "FAQPage schema가 감지되지 않았습니다. "
                "FAQ 페이지에 FAQPage JSON-LD를 추가하여 Rich Result를 확보하세요."
            )
        else:
            invalid = [f for f in result.faq_rich_results if not f.schema_valid]
            if invalid:
                recs.append(
                    f"{len(invalid)}개의 FAQ schema에 유효성 문제가 있습니다. "
                    "Google Rich Results Test로 검증하세요."
                )

        # Entity schema recommendations
        if not result.entity_schemas:
            recs.append(
                "Organization/Person/LocalBusiness schema가 없습니다. "
                "홈페이지에 Organization schema JSON-LD를 추가하세요."
            )
        else:
            for schema in result.entity_schemas:
                if schema.completeness < 50:
                    recs.append(
                        f"{schema.type} schema 완성도가 {schema.completeness}%입니다. "
                        f"누락 항목: {', '.join(schema.issues[:3])}"
                    )
                if not schema.same_as_links:
                    recs.append(
                        f"{schema.type} schema에 sameAs 속성이 없습니다. "
                        "소셜 미디어 프로필 URL을 sameAs에 추가하세요."
                    )

        # Brand SERP recommendations
        serp = result.brand_serp
        if not serp.knowledge_panel:
            recs.append(
                "브랜드 검색 시 Knowledge Panel이 표시되지 않습니다. "
                "Wikipedia, Wikidata, 구조화된 데이터를 통해 엔티티 인식을 강화하세요."
            )
        if not serp.sitelinks:
            recs.append(
                "Sitelinks가 표시되지 않습니다. "
                "사이트 구조와 내부 링크를 개선하세요."
            )
        if len(serp.social_profiles) < 3:
            recs.append(
                f"SERP에 소셜 프로필이 {len(serp.social_profiles)}개만 표시됩니다. "
                "주요 소셜 미디어 프로필을 활성화하고 schema sameAs에 연결하세요."
            )

        # Social profile accessibility
        broken = [url for url, ok in result.social_profile_status.items() if not ok]
        if broken:
            recs.append(
                f"접근 불가한 소셜 프로필 링크 {len(broken)}개: "
                f"{', '.join(broken[:3])}. sameAs URL을 업데이트하세요."
            )

        if not recs:
            recs.append("Entity SEO 상태가 양호합니다. 현재 수준을 유지하세요.")

        return recs

    # ------------------------------------------------------------------
    # Scoring
    # ------------------------------------------------------------------

    def compute_score(self, result: EntityAuditResult) -> float:
        """Compute overall entity SEO score (0-100)."""
        score = 0.0

        # PAA presence (15 points)
        paa_count = len(result.paa_questions)
        if paa_count >= 10:
            score += 15
        elif paa_count >= 5:
            score += 10
        elif paa_count > 0:
            score += 5

        # FAQ schema (15 points)
        if result.faq_rich_results:
            valid_count = sum(1 for f in result.faq_rich_results if f.schema_valid)
            score += min(15, valid_count * 5)

        # Entity schema (25 points)
        if result.entity_schemas:
            best_completeness = max(s.completeness for s in result.entity_schemas)
            score += best_completeness * 0.25

        # Brand SERP features (25 points)
        serp = result.brand_serp
        if serp.knowledge_panel:
            score += 10
        if serp.sitelinks:
            score += 5
        score += min(10, len(serp.features) * 2)

        # Social profiles (10 points)
        if result.social_profile_status:
            accessible = sum(1 for v in result.social_profile_status.values() if v)
            total = len(result.social_profile_status)
            score += (accessible / total) * 10 if total > 0 else 0

        # sameAs links (10 points)
        total_same_as = sum(len(s.same_as_links) for s in result.entity_schemas)
        score += min(10, total_same_as * 2)

        return round(min(100, score), 1)

    # ------------------------------------------------------------------
    # Main orchestrator
    # ------------------------------------------------------------------

    async def audit(
        self,
        url: str,
        entity_name: str,
        include_paa: bool = True,
        include_faq: bool = True,
    ) -> EntityAuditResult:
        """Orchestrate full entity SEO audit."""
        result = EntityAuditResult(url=url, entity_name=entity_name)
        logger.info("Starting entity audit for '%s' at %s", entity_name, url)

        async with aiohttp.ClientSession() as session:
            # Parallel tasks: entity schema, brand SERP, FAQ
            tasks = [
                self.audit_entity_schema(url, session),
                self.analyze_brand_serp(entity_name, session),
            ]

            if include_faq:
                tasks.append(self.track_faq_rich_results(url, session))

            results = await asyncio.gather(*tasks, return_exceptions=True)

            # Unpack results
            if not isinstance(results[0], Exception):
                result.entity_schemas = results[0]
            else:
                logger.error("Entity schema audit failed: %s", results[0])

            if not isinstance(results[1], Exception):
                result.brand_serp = results[1]
            else:
                logger.error("Brand SERP analysis failed: %s", results[1])

            if include_faq and len(results) > 2 and not isinstance(results[2], Exception):
                result.faq_rich_results = results[2]

            # PAA monitoring (sequential due to rate limits)
            if include_paa:
                result.paa_questions = await self.monitor_paa(entity_name, session=session)

            # Validate social profile links from schema
            all_same_as = []
            for schema in result.entity_schemas:
                all_same_as.extend(schema.same_as_links)
            if all_same_as:
                result.social_profile_status = await self.check_social_profile_links(
                    list(set(all_same_as)), session
                )

        # Compute score and recommendations
        result.overall_score = self.compute_score(result)
        result.recommendations = self.generate_recommendations(result)

        logger.info("Entity audit complete. Score: %.1f", result.overall_score)
        return result


# ---------------------------------------------------------------------------
# CLI display helpers
# ---------------------------------------------------------------------------


def display_result(result: EntityAuditResult) -> None:
    """Display audit result in rich tables."""
    console.print()
    console.print(f"[bold cyan]Entity SEO Audit: {result.entity_name}[/bold cyan]")
    console.print(f"URL: {result.url} | Score: {result.overall_score}/100")
    console.print()

    # Entity Schema table
    if result.entity_schemas:
        table = Table(title="Entity Schema Markup", show_header=True)
        table.add_column("Type", style="bold")
        table.add_column("Completeness")
        table.add_column("sameAs Links")
        table.add_column("Issues")

        for schema in result.entity_schemas:
            issues_text = "; ".join(schema.issues[:3]) if schema.issues else "None"
            table.add_row(
                schema.type,
                f"{schema.completeness}%",
                str(len(schema.same_as_links)),
                issues_text,
            )
        console.print(table)
    else:
        console.print("[red]No entity schema markup found on website![/red]")
    console.print()

    # Brand SERP table
    serp = result.brand_serp
    serp_table = Table(title="Brand SERP Analysis", show_header=True)
    serp_table.add_column("Feature", style="bold")
    serp_table.add_column("Status")

    serp_table.add_row("Knowledge Panel", "[green]Yes[/]" if serp.knowledge_panel else "[red]No[/]")
    serp_table.add_row("Sitelinks", "[green]Yes[/]" if serp.sitelinks else "[red]No[/]")
    serp_table.add_row("PAA Count", str(serp.paa_count))
    serp_table.add_row("SERP Features", ", ".join(serp.features) if serp.features else "None")
    serp_table.add_row("Social Profiles", ", ".join(serp.social_profiles) if serp.social_profiles else "None")

    console.print(serp_table)
    console.print()

    # PAA Questions
    if result.paa_questions:
        paa_table = Table(title=f"People Also Ask ({len(result.paa_questions)} questions)", show_header=True)
        paa_table.add_column("#", style="dim")
        paa_table.add_column("Question")
        paa_table.add_column("Keyword")

        for i, q in enumerate(result.paa_questions[:15], 1):
            paa_table.add_row(str(i), q.question, q.keyword)
        console.print(paa_table)
    console.print()

    # FAQ Rich Results
    if result.faq_rich_results:
        faq_table = Table(title="FAQ Rich Results", show_header=True)
        faq_table.add_column("URL")
        faq_table.add_column("Questions")
        faq_table.add_column("Valid")

        for faq in result.faq_rich_results:
            faq_table.add_row(
                faq.url[:60],
                str(faq.question_count),
                "[green]Yes[/]" if faq.schema_valid else "[red]No[/]",
            )
        console.print(faq_table)
    console.print()

    # Social Profile Status
    if result.social_profile_status:
        sp_table = Table(title="Social Profile Link Status", show_header=True)
        sp_table.add_column("URL")
        sp_table.add_column("Accessible")

        for link, accessible in result.social_profile_status.items():
            sp_table.add_row(
                link[:70],
                "[green]Yes[/]" if accessible else "[red]No[/]",
            )
        console.print(sp_table)
    console.print()

    # Recommendations
    console.print("[bold yellow]Recommendations:[/bold yellow]")
    for i, rec in enumerate(result.recommendations, 1):
        console.print(f"  {i}. {rec}")
    console.print()


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Entity SEO Auditor",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("--url", required=True, help="Website URL to audit")
    parser.add_argument("--entity", required=True, help="Entity/brand name")
    parser.add_argument("--paa", action="store_true", default=True, help="Include PAA monitoring (default: True)")
    parser.add_argument("--no-paa", action="store_true", help="Skip PAA monitoring")
    parser.add_argument("--faq", action="store_true", default=True, help="Include FAQ tracking (default: True)")
    parser.add_argument("--no-faq", action="store_true", help="Skip FAQ tracking")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--output", type=str, help="Output file path")
    return parser.parse_args()


async def main() -> None:
    args = parse_args()

    auditor = EntityAuditor()
    result = await auditor.audit(
        url=args.url,
        entity_name=args.entity,
        include_paa=not args.no_paa,
        include_faq=not args.no_faq,
    )

    if args.json:
        output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
        if args.output:
            with open(args.output, "w", encoding="utf-8") as f:
                f.write(output)
            console.print(f"[green]Output saved to {args.output}[/green]")
        else:
            print(output)
    else:
        display_result(result)
        if args.output:
            with open(args.output, "w", encoding="utf-8") as f:
                json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
            console.print(f"[green]Output saved to {args.output}[/green]")


if __name__ == "__main__":
    asyncio.run(main())