our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py

"""
E-Commerce SEO Auditor
======================
Purpose: Audit product pages, category taxonomy, duplicate content,
         pagination SEO, and Korean marketplace presence.
Python: 3.10+
"""

import argparse
import asyncio
import json
import logging
import re
import sys
from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import Any
from urllib.parse import parse_qs, urljoin, urlparse

import aiohttp
from bs4 import BeautifulSoup
from rich.console import Console
from rich.table import Table

from base_client import BaseAsyncClient, config

logger = logging.getLogger(__name__)
console = Console()

# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------

@dataclass
class ProductPageIssue:
    """Single issue found on a product page."""
    url: str
    issue_type: str          # title, meta_desc, h1, image_alt, internal_link, canonical, pagination
    severity: str            # critical, high, medium, low
    message: str
    recommendation: str


@dataclass
class CategoryNode:
    """A node in the category taxonomy tree."""
    url: str
    name: str
    depth: int
    children_count: int
    has_breadcrumb: bool


@dataclass
class DuplicateGroup:
    """Group of duplicate or near-duplicate product URLs."""
    canonical_url: str
    duplicate_urls: list[str]
    reason: str              # parameter_variant, product_variant, pagination_missing_canonical


@dataclass
class MarketplacePresence:
    """Presence record for a Korean marketplace."""
    platform: str            # naver_smart_store, coupang, gmarket, 11st
    found: bool
    url: str | None = None
    product_count: int = 0


@dataclass
class EcommerceAuditResult:
    """Complete e-commerce SEO audit result."""
    url: str
    product_pages_audited: int = 0
    issues: dict[str, list[dict]] = field(default_factory=lambda: {
        "critical": [], "high": [], "medium": [], "low": []
    })
    category_structure: dict[str, Any] = field(default_factory=dict)
    duplicate_groups: list[dict] = field(default_factory=list)
    pagination_issues: list[dict] = field(default_factory=list)
    korean_marketplaces: dict[str, dict] = field(default_factory=dict)
    naver_smart_store: dict[str, Any] = field(default_factory=dict)
    score: int = 0
    timestamp: str = ""

    def add_issue(self, issue: ProductPageIssue) -> None:
        self.issues[issue.severity].append(asdict(issue))

    def calculate_score(self) -> int:
        """Score 0-100 based on issue severity counts."""
        penalties = {
            "critical": 15,
            "high": 8,
            "medium": 3,
            "low": 1,
        }
        total_penalty = sum(
            len(items) * penalties[sev]
            for sev, items in self.issues.items()
        )
        self.score = max(0, 100 - total_penalty)
        return self.score


# ---------------------------------------------------------------------------
# Product URL pattern helpers
# ---------------------------------------------------------------------------

PRODUCT_URL_PATTERNS = [
    r"/product[s]?/",
    r"/item[s]?/",
    r"/p/",
    r"/dp/",
    r"/goods/",
    r"/shop/",
    r"/detail/",
    r"\?product_id=",
    r"\?item_id=",
    r"\?goodsno=",
]

CATEGORY_URL_PATTERNS = [
    r"/category/",
    r"/categories/",
    r"/collections?/",
    r"/c/",
    r"/department/",
    r"/browse/",
]

FACETED_NAV_PARAMS = [
    "color", "size", "sort", "order", "filter", "brand",
    "price_min", "price_max", "page", "per_page", "view",
    "material", "rating", "availability",
]


def is_product_url(url: str) -> bool:
    """Check if a URL looks like a product page."""
    return any(re.search(pat, url, re.IGNORECASE) for pat in PRODUCT_URL_PATTERNS)


def is_category_url(url: str) -> bool:
    """Check if a URL looks like a category page."""
    return any(re.search(pat, url, re.IGNORECASE) for pat in CATEGORY_URL_PATTERNS)


def get_faceted_params(url: str) -> dict[str, list[str]]:
    """Extract faceted navigation parameters from a URL."""
    parsed = urlparse(url)
    params = parse_qs(parsed.query)
    return {k: v for k, v in params.items() if k.lower() in FACETED_NAV_PARAMS}


# ---------------------------------------------------------------------------
# Main auditor
# ---------------------------------------------------------------------------

class EcommerceAuditor(BaseAsyncClient):
    """E-commerce SEO auditor with product, category, and marketplace checks."""

    def __init__(
        self,
        max_concurrent: int = 10,
        requests_per_second: float = 5.0,
        timeout: int = 30,
    ):
        super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
        self.timeout = aiohttp.ClientTimeout(total=timeout)
        self.headers = {
            "User-Agent": (
                "Mozilla/5.0 (compatible; EcommerceSEOBot/1.0; "
                "+https://ourdigital.org)"
            ),
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
        }

    # ------------------------------------------------------------------
    # Page fetching
    # ------------------------------------------------------------------

    async def _fetch_page(self, session: aiohttp.ClientSession, url: str) -> tuple[str, str]:
        """Fetch a page and return (final_url, html)."""
        try:
            async with session.get(url, headers=self.headers, timeout=self.timeout,
                                   allow_redirects=True, ssl=False) as resp:
                html = await resp.text(errors="replace")
                return str(resp.url), html
        except Exception as exc:
            self.logger.warning(f"Failed to fetch {url}: {exc}")
            return url, ""

    # ------------------------------------------------------------------
    # Product page discovery via Ahrefs
    # ------------------------------------------------------------------

    async def get_product_pages(self, domain: str, sample: int = 50) -> list[dict]:
        """
        Discover product pages using Ahrefs pages-by-traffic.
        Falls back to sitemap crawling if Ahrefs is unavailable.
        Returns list of dicts with keys: url, traffic, keywords.
        """
        pages: list[dict] = []

        # Attempt Ahrefs via environment / MCP
        self.logger.info(f"Discovering product pages for {domain} (sample={sample})")

        # Fallback: fetch sitemap and filter product URLs
        sitemap_urls = await self._fetch_sitemap_urls(domain)
        product_urls = [u for u in sitemap_urls if is_product_url(u)]
        if not product_urls:
            # Broader heuristic: any URL with 3+ path segments
            product_urls = [
                u for u in sitemap_urls
                if len(urlparse(u).path.strip("/").split("/")) >= 2
            ]

        for url in product_urls[:sample]:
            pages.append({"url": url, "traffic": 0, "keywords": 0})

        self.logger.info(f"Found {len(pages)} product page candidates")
        return pages

    async def _fetch_sitemap_urls(self, domain: str) -> list[str]:
        """Fetch URLs from the site's XML sitemap."""
        urls: list[str] = []
        base = f"https://{domain}" if not domain.startswith("http") else domain
        parsed = urlparse(base)
        sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"

        async with aiohttp.ClientSession() as session:
            try:
                async with session.get(sitemap_url, headers=self.headers,
                                       timeout=self.timeout, ssl=False) as resp:
                    if resp.status == 200:
                        text = await resp.text(errors="replace")
                        soup = BeautifulSoup(text, "lxml-xml")
                        # Handle sitemap index
                        sitemapindex = soup.find_all("sitemap")
                        if sitemapindex:
                            for sm in sitemapindex[:5]:
                                loc = sm.find("loc")
                                if loc:
                                    child_urls = await self._parse_sitemap(session, loc.text.strip())
                                    urls.extend(child_urls)
                        else:
                            url_tags = soup.find_all("url")
                            for tag in url_tags:
                                loc = tag.find("loc")
                                if loc:
                                    urls.append(loc.text.strip())
            except Exception as exc:
                self.logger.warning(f"Sitemap fetch failed: {exc}")

        return urls

    async def _parse_sitemap(self, session: aiohttp.ClientSession, url: str) -> list[str]:
        """Parse a single sitemap XML file and return its URLs."""
        urls: list[str] = []
        try:
            async with session.get(url, headers=self.headers,
                                   timeout=self.timeout, ssl=False) as resp:
                if resp.status == 200:
                    text = await resp.text(errors="replace")
                    soup = BeautifulSoup(text, "lxml-xml")
                    for tag in soup.find_all("url"):
                        loc = tag.find("loc")
                        if loc:
                            urls.append(loc.text.strip())
        except Exception as exc:
            self.logger.warning(f"Failed to parse sitemap {url}: {exc}")
        return urls

    # ------------------------------------------------------------------
    # Product page audit
    # ------------------------------------------------------------------

    async def audit_product_page(
        self,
        session: aiohttp.ClientSession,
        page_url: str,
    ) -> list[ProductPageIssue]:
        """Audit a single product page for SEO issues."""
        issues: list[ProductPageIssue] = []
        _, html = await self._fetch_page(session, page_url)
        if not html:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="accessibility", severity="critical",
                message="Page returned empty or could not be fetched",
                recommendation="Verify the URL is accessible and returns valid HTML.",
            ))
            return issues

        soup = BeautifulSoup(html, "lxml")

        # --- Title tag ---
        title_tag = soup.find("title")
        title_text = title_tag.get_text(strip=True) if title_tag else ""
        if not title_text:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="title", severity="critical",
                message="Missing <title> tag",
                recommendation="Add a unique title containing the product name (under 60 characters).",
            ))
        elif len(title_text) > 60:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="title", severity="medium",
                message=f"Title too long ({len(title_text)} chars): {title_text[:80]}...",
                recommendation="Shorten title to under 60 characters for full SERP display.",
            ))
        elif len(title_text) < 15:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="title", severity="medium",
                message=f"Title too short ({len(title_text)} chars): {title_text}",
                recommendation="Expand title with product name, key feature, and brand.",
            ))

        # --- Meta description ---
        meta_desc_tag = soup.find("meta", attrs={"name": re.compile(r"description", re.I)})
        meta_desc = meta_desc_tag.get("content", "").strip() if meta_desc_tag else ""
        if not meta_desc:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="meta_desc", severity="high",
                message="Missing meta description",
                recommendation="Add meta description with product features and price info (under 155 chars).",
            ))
        elif len(meta_desc) > 155:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="meta_desc", severity="low",
                message=f"Meta description too long ({len(meta_desc)} chars)",
                recommendation="Trim to under 155 characters for full SERP display.",
            ))

        # --- H1 tag ---
        h1_tags = soup.find_all("h1")
        if not h1_tags:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="h1", severity="high",
                message="Missing H1 tag on product page",
                recommendation="Add a single H1 with the product name.",
            ))
        elif len(h1_tags) > 1:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="h1", severity="medium",
                message=f"Multiple H1 tags found ({len(h1_tags)})",
                recommendation="Use a single H1 for the product name; use H2/H3 for subsections.",
            ))

        # --- Image alt text ---
        images = soup.find_all("img")
        product_images = [
            img for img in images
            if img.get("src") and not any(
                skip in (img.get("src", "") + img.get("class", [""])[0] if img.get("class") else img.get("src", ""))
                for skip in ["logo", "icon", "badge", "banner", "sprite", "pixel", "tracking"]
            )
        ]
        missing_alt = [img for img in product_images if not img.get("alt", "").strip()]
        if missing_alt:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="image_alt", severity="high",
                message=f"{len(missing_alt)} product image(s) missing alt text",
                recommendation="Add descriptive alt text with product name to all product images.",
            ))

        generic_alt = [
            img for img in product_images
            if img.get("alt", "").strip().lower() in [
                "image", "photo", "product", "picture", "img", "product image",
                "상품 이미지", "이미지", "사진",
            ]
        ]
        if generic_alt:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="image_alt", severity="medium",
                message=f"{len(generic_alt)} image(s) with generic alt text",
                recommendation="Replace generic alt text with specific product descriptions.",
            ))

        # --- Canonical tag ---
        canonical = soup.find("link", attrs={"rel": "canonical"})
        if not canonical:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="canonical", severity="high",
                message="Missing canonical tag on product page",
                recommendation="Add <link rel='canonical'> pointing to the preferred product URL.",
            ))
        else:
            canonical_href = canonical.get("href", "").strip()
            if canonical_href and canonical_href != page_url:
                # Only flag if significantly different (not just trailing slash)
                norm_canonical = canonical_href.rstrip("/")
                norm_page = page_url.rstrip("/")
                if norm_canonical != norm_page:
                    issues.append(ProductPageIssue(
                        url=page_url, issue_type="canonical", severity="medium",
                        message=f"Canonical points to different URL: {canonical_href}",
                        recommendation="Verify canonical is correct; ensure product variants point to the main product.",
                    ))

        # --- Internal links ---
        internal_links = []
        parsed_page = urlparse(page_url)
        for a_tag in soup.find_all("a", href=True):
            href = a_tag["href"]
            full_url = urljoin(page_url, href)
            parsed_link = urlparse(full_url)
            if parsed_link.netloc == parsed_page.netloc:
                internal_links.append(full_url)

        if len(internal_links) < 3:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="internal_link", severity="medium",
                message=f"Only {len(internal_links)} internal links found",
                recommendation="Add related product links, category breadcrumbs, and cross-sell sections.",
            ))

        # --- Open Graph / social meta ---
        og_title = soup.find("meta", attrs={"property": "og:title"})
        og_image = soup.find("meta", attrs={"property": "og:image"})
        if not og_title or not og_image:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="social_meta", severity="low",
                message="Missing Open Graph tags (og:title or og:image)",
                recommendation="Add OG tags for better social sharing of product pages.",
            ))

        return issues

    # ------------------------------------------------------------------
    # Category taxonomy analysis
    # ------------------------------------------------------------------

    async def analyze_category_taxonomy(
        self,
        session: aiohttp.ClientSession,
        base_url: str,
        max_categories: int = 50,
    ) -> dict[str, Any]:
        """Analyze category page structure and taxonomy depth."""
        result: dict[str, Any] = {
            "categories_found": 0,
            "max_depth": 0,
            "avg_depth": 0.0,
            "breadcrumbs_present": 0,
            "breadcrumbs_missing": 0,
            "faceted_nav_issues": [],
            "nodes": [],
        }

        # Discover category URLs from sitemap
        sitemap_urls = await self._fetch_sitemap_urls(base_url)
        category_urls = [u for u in sitemap_urls if is_category_url(u)][:max_categories]

        if not category_urls:
            # Try crawling homepage for category links
            _, html = await self._fetch_page(session, base_url)
            if html:
                soup = BeautifulSoup(html, "lxml")
                for a_tag in soup.find_all("a", href=True):
                    full_url = urljoin(base_url, a_tag["href"])
                    if is_category_url(full_url) and full_url not in category_urls:
                        category_urls.append(full_url)
                        if len(category_urls) >= max_categories:
                            break

        depths: list[int] = []
        for cat_url in category_urls:
            _, html = await self._fetch_page(session, cat_url)
            if not html:
                continue

            soup = BeautifulSoup(html, "lxml")
            parsed = urlparse(cat_url)
            path_parts = [p for p in parsed.path.strip("/").split("/") if p]
            depth = len(path_parts)
            depths.append(depth)

            # Check breadcrumb
            has_breadcrumb = bool(
                soup.find("nav", attrs={"aria-label": re.compile(r"breadcrumb", re.I)})
                or soup.find(attrs={"class": re.compile(r"breadcrumb", re.I)})
                or soup.find("script", string=re.compile(r"BreadcrumbList", re.I))
                or soup.find("ol", attrs={"itemtype": re.compile(r"BreadcrumbList", re.I)})
            )

            if has_breadcrumb:
                result["breadcrumbs_present"] += 1
            else:
                result["breadcrumbs_missing"] += 1

            # Category name from H1 or title
            h1 = soup.find("h1")
            cat_name = h1.get_text(strip=True) if h1 else path_parts[-1] if path_parts else "unknown"

            # Count child category links
            children = 0
            for a_tag in soup.find_all("a", href=True):
                link = urljoin(cat_url, a_tag["href"])
                if is_category_url(link) and link != cat_url:
                    children += 1

            node = CategoryNode(
                url=cat_url,
                name=cat_name,
                depth=depth,
                children_count=children,
                has_breadcrumb=has_breadcrumb,
            )
            result["nodes"].append(asdict(node))

            # Faceted navigation check
            faceted = get_faceted_params(cat_url)
            if faceted:
                robots_meta = soup.find("meta", attrs={"name": "robots"})
                robots_content = robots_meta.get("content", "").lower() if robots_meta else ""
                canonical = soup.find("link", attrs={"rel": "canonical"})
                canonical_href = canonical.get("href", "").strip() if canonical else ""

                if "noindex" not in robots_content and canonical_href == cat_url:
                    result["faceted_nav_issues"].append({
                        "url": cat_url,
                        "params": list(faceted.keys()),
                        "message": "Faceted URL is indexable without canonical to base category",
                        "recommendation": (
                            "Add noindex or canonical to the non-filtered category URL "
                            "to prevent duplicate content."
                        ),
                    })

        if depths:
            result["max_depth"] = max(depths)
            result["avg_depth"] = round(sum(depths) / len(depths), 1)
        result["categories_found"] = len(category_urls)

        return result

    # ------------------------------------------------------------------
    # Duplicate content detection
    # ------------------------------------------------------------------

    async def detect_duplicates(self, page_urls: list[str]) -> list[DuplicateGroup]:
        """Detect potential duplicate content from parameter variants."""
        groups: list[DuplicateGroup] = []
        base_to_variants: dict[str, list[str]] = {}

        for url in page_urls:
            parsed = urlparse(url)
            base = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
            params = parse_qs(parsed.query)

            faceted = {k: v for k, v in params.items() if k.lower() in FACETED_NAV_PARAMS}
            if faceted:
                base_to_variants.setdefault(base, []).append(url)

        for base_url, variants in base_to_variants.items():
            if len(variants) > 1:
                groups.append(DuplicateGroup(
                    canonical_url=base_url,
                    duplicate_urls=variants,
                    reason="parameter_variant",
                ))

        # Check for product variant duplicates (e.g., /product/123-red vs /product/123-blue)
        slug_groups: dict[str, list[str]] = {}
        for url in page_urls:
            parsed = urlparse(url)
            path = parsed.path.rstrip("/")
            # Strip trailing color/size suffixes
            base_slug = re.sub(
                r"[-_](red|blue|green|black|white|small|medium|large|xs|s|m|l|xl|xxl)$",
                "", path, flags=re.IGNORECASE,
            )
            if base_slug != path:
                slug_groups.setdefault(base_slug, []).append(url)

        for base_slug, variants in slug_groups.items():
            if len(variants) > 1:
                groups.append(DuplicateGroup(
                    canonical_url=variants[0],
                    duplicate_urls=variants[1:],
                    reason="product_variant",
                ))

        return groups

    # ------------------------------------------------------------------
    # Pagination SEO
    # ------------------------------------------------------------------

    async def check_pagination_seo(
        self,
        session: aiohttp.ClientSession,
        page_url: str,
    ) -> list[ProductPageIssue]:
        """Check pagination implementation for SEO best practices."""
        issues: list[ProductPageIssue] = []
        _, html = await self._fetch_page(session, page_url)
        if not html:
            return issues

        soup = BeautifulSoup(html, "lxml")

        # Look for pagination links
        pagination_links = []
        for a_tag in soup.find_all("a", href=True):
            href = a_tag["href"]
            full_url = urljoin(page_url, href)
            params = parse_qs(urlparse(full_url).query)
            if "page" in params or "p" in params or re.search(r"/page/\d+", full_url):
                pagination_links.append(full_url)

        if not pagination_links:
            return issues

        # Check rel=prev/next (deprecated by Google but still useful)
        rel_prev = soup.find("link", attrs={"rel": "prev"})
        rel_next = soup.find("link", attrs={"rel": "next"})
        if not rel_prev and not rel_next:
            issues.append(ProductPageIssue(
                url=page_url, issue_type="pagination", severity="low",
                message="No rel=prev/next links on paginated page",
                recommendation=(
                    "While Google no longer uses rel=prev/next, other engines may. "
                    "Consider adding them for broader compatibility."
                ),
            ))

        # Check canonical on paginated pages
        canonical = soup.find("link", attrs={"rel": "canonical"})
        if canonical:
            canonical_href = canonical.get("href", "").strip()
            # If canonical points to page 1 on a non-page-1 URL, flag it
            parsed_page = urlparse(page_url)
            page_params = parse_qs(parsed_page.query)
            current_page_num = page_params.get("page", page_params.get("p", ["1"]))[0]

            if current_page_num != "1":
                parsed_canonical = urlparse(canonical_href)
                canon_params = parse_qs(parsed_canonical.query)
                canon_page_num = canon_params.get("page", canon_params.get("p", ["1"]))[0]

                if canon_page_num == "1" or canonical_href.rstrip("/") == page_url.split("?")[0].rstrip("/"):
                    issues.append(ProductPageIssue(
                        url=page_url, issue_type="pagination", severity="high",
                        message=f"Page {current_page_num} canonical points to page 1",
                        recommendation=(
                            "Each paginated page should self-reference its own canonical URL "
                            "to ensure all pages are indexable."
                        ),
                    ))

        # Check robots noindex on filtered/sorted pages
        robots_meta = soup.find("meta", attrs={"name": "robots"})
        if robots_meta:
            content = robots_meta.get("content", "").lower()
            if "noindex" in content and pagination_links:
                issues.append(ProductPageIssue(
                    url=page_url, issue_type="pagination", severity="medium",
                    message="Paginated page has noindex but contains product links",
                    recommendation=(
                        "Ensure products on noindex pages are still discoverable "
                        "via other indexed pages or sitemap."
                    ),
                ))

        return issues

    # ------------------------------------------------------------------
    # Korean marketplace presence
    # ------------------------------------------------------------------

    async def check_korean_marketplaces(
        self,
        session: aiohttp.ClientSession,
        brand_name: str,
    ) -> dict[str, MarketplacePresence]:
        """Search for brand presence on Korean marketplace platforms."""
        marketplaces = {}

        search_configs = {
            "naver_smart_store": {
                "search_url": f"https://search.shopping.naver.com/search/all?query={brand_name}",
                "platform": "Naver Smart Store",
                "indicator_patterns": [r"smartstore\.naver\.com", r"brand\.naver\.com"],
            },
            "coupang": {
                "search_url": f"https://www.coupang.com/np/search?component=&q={brand_name}",
                "platform": "Coupang",
                "indicator_patterns": [r"coupang\.com/vp/products/"],
            },
            "gmarket": {
                "search_url": f"https://browse.gmarket.co.kr/search?keyword={brand_name}",
                "platform": "Gmarket",
                "indicator_patterns": [r"gmarket\.co\.kr/item/"],
            },
            "11st": {
                "search_url": f"https://search.11st.co.kr/Search.tmall?kwd={brand_name}",
                "platform": "11번가",
                "indicator_patterns": [r"11st\.co\.kr/products/"],
            },
        }

        for key, cfg in search_configs.items():
            presence = MarketplacePresence(platform=cfg["platform"], found=False)
            try:
                _, html = await self._fetch_page(session, cfg["search_url"])
                if html:
                    for pattern in cfg["indicator_patterns"]:
                        matches = re.findall(pattern, html)
                        if matches:
                            presence.found = True
                            presence.product_count = len(matches)
                            # Extract first matching URL
                            url_match = re.search(
                                rf'href=["\']?(https?://[^"\'>\s]*{pattern}[^"\'>\s]*)',
                                html,
                            )
                            if url_match:
                                presence.url = url_match.group(1)
                            break
            except Exception as exc:
                self.logger.warning(f"Marketplace check failed for {key}: {exc}")

            marketplaces[key] = presence

        return marketplaces

    # ------------------------------------------------------------------
    # Naver Smart Store optimization
    # ------------------------------------------------------------------

    async def check_naver_smart_store(
        self,
        session: aiohttp.ClientSession,
        url: str,
    ) -> dict[str, Any]:
        """Check Naver Smart Store-specific SEO elements."""
        result: dict[str, Any] = {
            "is_smart_store": False,
            "issues": [],
            "optimizations": [],
        }

        parsed = urlparse(url)
        is_smart_store = "smartstore.naver.com" in parsed.netloc or "brand.naver.com" in parsed.netloc
        result["is_smart_store"] = is_smart_store

        _, html = await self._fetch_page(session, url)
        if not html:
            return result

        soup = BeautifulSoup(html, "lxml")

        # Check Naver-specific meta tags
        naver_site_verification = soup.find("meta", attrs={"name": "naver-site-verification"})
        if not naver_site_verification:
            result["issues"].append({
                "type": "naver_verification",
                "severity": "medium",
                "message": "Missing naver-site-verification meta tag",
                "recommendation": "Add Naver Search Advisor verification tag.",
            })

        # Check for Naver Shopping structured data attributes
        product_schema = soup.find("script", string=re.compile(r'"@type"\s*:\s*"Product"'))
        if not product_schema:
            result["issues"].append({
                "type": "naver_schema",
                "severity": "high",
                "message": "Missing Product schema for Naver Shopping",
                "recommendation": "Add Product JSON-LD with Korean product names and descriptions.",
            })

        # Check Korean content optimization
        body_text = soup.get_text(separator=" ", strip=True)
        korean_chars = len(re.findall(r"[\uac00-\ud7af]", body_text))
        total_chars = len(body_text)
        if total_chars > 0:
            korean_ratio = korean_chars / total_chars
            if korean_ratio < 0.3 and is_smart_store:
                result["issues"].append({
                    "type": "korean_content",
                    "severity": "medium",
                    "message": f"Low Korean content ratio ({korean_ratio:.0%}) for Korean marketplace",
                    "recommendation": "Increase Korean language content for Naver search visibility.",
                })

        # Smart Store specific: check product detail image text
        detail_images = soup.find_all("img", attrs={"class": re.compile(r"detail|product", re.I)})
        if detail_images and not soup.find("div", attrs={"class": re.compile(r"product.*(desc|detail|content)", re.I)}):
            result["optimizations"].append({
                "type": "detail_text",
                "message": "Product details appear to be image-only",
                "recommendation": (
                    "Add HTML text product descriptions alongside images "
                    "for Naver search indexing."
                ),
            })

        return result

    # ------------------------------------------------------------------
    # Orchestrator
    # ------------------------------------------------------------------

    async def audit(
        self,
        url: str,
        scope: str = "all",
        sample: int = 50,
        check_marketplaces: bool = False,
    ) -> EcommerceAuditResult:
        """Run the full e-commerce SEO audit."""
        result = EcommerceAuditResult(url=url, timestamp=datetime.now().isoformat())
        parsed = urlparse(url if url.startswith("http") else f"https://{url}")
        domain = f"{parsed.scheme}://{parsed.netloc}"

        async with aiohttp.ClientSession() as session:
            # --- Product page audit ---
            if scope in ("all", "products"):
                self.logger.info("=== Product Page Audit ===")
                pages = await self.get_product_pages(domain, sample=sample)
                result.product_pages_audited = len(pages)

                for page_info in pages:
                    page_issues = await self.audit_product_page(session, page_info["url"])
                    for issue in page_issues:
                        result.add_issue(issue)

                # Duplicate detection
                all_urls = [p["url"] for p in pages]
                sitemap_urls = await self._fetch_sitemap_urls(domain)
                all_urls.extend(sitemap_urls)
                dup_groups = await self.detect_duplicates(list(set(all_urls)))
                result.duplicate_groups = [asdict(dg) for dg in dup_groups]
                for dg in dup_groups:
                    result.add_issue(ProductPageIssue(
                        url=dg.canonical_url,
                        issue_type="duplicate",
                        severity="high" if dg.reason == "parameter_variant" else "medium",
                        message=f"Duplicate group ({dg.reason}): {len(dg.duplicate_urls)} variants",
                        recommendation="Implement canonical tags or parameter handling in GSC/Naver.",
                    ))

                # Pagination check on category-like pages
                category_like = [u for u in sitemap_urls if is_category_url(u)][:10]
                for cat_url in category_like:
                    pag_issues = await self.check_pagination_seo(session, cat_url)
                    result.pagination_issues.extend([asdict(i) for i in pag_issues])
                    for issue in pag_issues:
                        result.add_issue(issue)

            # --- Category taxonomy ---
            if scope in ("all", "categories"):
                self.logger.info("=== Category Taxonomy Analysis ===")
                cat_result = await self.analyze_category_taxonomy(session, domain)
                result.category_structure = cat_result

                if cat_result.get("max_depth", 0) > 4:
                    result.add_issue(ProductPageIssue(
                        url=domain,
                        issue_type="category_depth",
                        severity="medium",
                        message=f"Category depth exceeds 4 levels (max: {cat_result['max_depth']})",
                        recommendation="Flatten category structure to 3-4 levels for better crawlability.",
                    ))

                if cat_result.get("breadcrumbs_missing", 0) > 0:
                    missing = cat_result["breadcrumbs_missing"]
                    total = cat_result.get("categories_found", 1)
                    result.add_issue(ProductPageIssue(
                        url=domain,
                        issue_type="breadcrumb",
                        severity="high" if missing > total * 0.5 else "medium",
                        message=f"{missing} category pages missing breadcrumb navigation",
                        recommendation="Add BreadcrumbList schema and visible breadcrumbs to all category pages.",
                    ))

                for fni in cat_result.get("faceted_nav_issues", []):
                    result.add_issue(ProductPageIssue(
                        url=fni["url"],
                        issue_type="faceted_nav",
                        severity="high",
                        message=fni["message"],
                        recommendation=fni["recommendation"],
                    ))

            # --- Korean marketplaces ---
            if check_marketplaces:
                self.logger.info("=== Korean Marketplace Presence ===")
                # Extract brand name from site
                _, home_html = await self._fetch_page(session, domain)
                brand_name = ""
                if home_html:
                    home_soup = BeautifulSoup(home_html, "lxml")
                    og_site = home_soup.find("meta", attrs={"property": "og:site_name"})
                    if og_site:
                        brand_name = og_site.get("content", "").strip()
                    if not brand_name:
                        title_tag = home_soup.find("title")
                        if title_tag:
                            brand_name = title_tag.get_text(strip=True).split("|")[0].split("-")[0].strip()

                if brand_name:
                    mp_results = await self.check_korean_marketplaces(session, brand_name)
                    result.korean_marketplaces = {
                        k: asdict(v) for k, v in mp_results.items()
                    }

                # Naver Smart Store check
                naver_result = await self.check_naver_smart_store(session, domain)
                result.naver_smart_store = naver_result
                for naver_issue in naver_result.get("issues", []):
                    result.add_issue(ProductPageIssue(
                        url=domain,
                        issue_type=naver_issue["type"],
                        severity=naver_issue["severity"],
                        message=naver_issue["message"],
                        recommendation=naver_issue["recommendation"],
                    ))

        result.calculate_score()
        return result


# ---------------------------------------------------------------------------
# CLI output helpers
# ---------------------------------------------------------------------------

def print_rich_report(result: EcommerceAuditResult) -> None:
    """Print a rich-formatted report to the console."""
    console.print(f"\n[bold cyan]E-Commerce SEO Audit Report[/bold cyan]")
    console.print(f"URL: {result.url}")
    console.print(f"Product Pages Audited: {result.product_pages_audited}")
    console.print(f"Timestamp: {result.timestamp}")

    # Score
    score_color = "green" if result.score >= 80 else "yellow" if result.score >= 50 else "red"
    console.print(f"\n[bold {score_color}]Score: {result.score}/100[/bold {score_color}]")

    # Issues summary
    table = Table(title="Issues Summary")
    table.add_column("Severity", style="bold")
    table.add_column("Count", justify="right")
    for sev in ["critical", "high", "medium", "low"]:
        color = {"critical": "red", "high": "yellow", "medium": "cyan", "low": "dim"}[sev]
        table.add_row(f"[{color}]{sev.upper()}[/{color}]", str(len(result.issues[sev])))
    console.print(table)

    # Top issues
    for sev in ["critical", "high"]:
        if result.issues[sev]:
            console.print(f"\n[bold red]{sev.upper()} Issues:[/bold red]")
            for issue in result.issues[sev][:10]:
                console.print(f"  - [{issue['issue_type']}] {issue['message']}")
                console.print(f"    [dim]{issue['recommendation']}[/dim]")

    # Category structure
    if result.category_structure:
        cs = result.category_structure
        console.print(f"\n[bold]Category Structure:[/bold]")
        console.print(f"  Categories found: {cs.get('categories_found', 0)}")
        console.print(f"  Max depth: {cs.get('max_depth', 0)}")
        console.print(f"  Breadcrumbs present: {cs.get('breadcrumbs_present', 0)}")
        console.print(f"  Breadcrumbs missing: {cs.get('breadcrumbs_missing', 0)}")

    # Duplicates
    if result.duplicate_groups:
        console.print(f"\n[bold]Duplicate Groups: {len(result.duplicate_groups)}[/bold]")
        for dg in result.duplicate_groups[:5]:
            console.print(f"  [{dg['reason']}] {dg['canonical_url']} ({len(dg['duplicate_urls'])} variants)")

    # Korean marketplaces
    if result.korean_marketplaces:
        console.print(f"\n[bold]Korean Marketplace Presence:[/bold]")
        for key, mp in result.korean_marketplaces.items():
            status = "[green]Found[/green]" if mp.get("found") else "[red]Not Found[/red]"
            console.print(f"  {mp.get('platform', key)}: {status}")
            if mp.get("url"):
                console.print(f"    URL: {mp['url']}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(
        description="E-Commerce SEO Auditor - Product page and marketplace audit",
    )
    parser.add_argument("--url", required=True, help="Target website URL")
    parser.add_argument(
        "--scope",
        choices=["all", "products", "categories"],
        default="all",
        help="Audit scope (default: all)",
    )
    parser.add_argument(
        "--korean-marketplaces",
        action="store_true",
        help="Check Korean marketplace presence (Coupang, Gmarket, 11번가, Naver)",
    )
    parser.add_argument(
        "--sample",
        type=int,
        default=50,
        help="Number of product pages to sample (default: 50)",
    )
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--output", type=str, help="Save output to file")
    args = parser.parse_args()

    auditor = EcommerceAuditor()
    result = asyncio.run(
        auditor.audit(
            url=args.url,
            scope=args.scope,
            sample=args.sample,
            check_marketplaces=args.korean_marketplaces,
        )
    )

    if args.json:
        output = json.dumps(asdict(result), indent=2, ensure_ascii=False, default=str)
        if args.output:
            with open(args.output, "w", encoding="utf-8") as f:
                f.write(output)
            console.print(f"[green]Results saved to {args.output}[/green]")
        else:
            print(output)
    else:
        print_rich_report(result)
        if args.output:
            output = json.dumps(asdict(result), indent=2, ensure_ascii=False, default=str)
            with open(args.output, "w", encoding="utf-8") as f:
                f.write(output)
            console.print(f"\n[green]JSON results also saved to {args.output}[/green]")

    auditor.print_stats()


if __name__ == "__main__":
    main()