our-claude-skills/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py

"""
Backlink Auditor - Backlink profile analysis and toxic link detection
=====================================================================
Purpose: Analyze backlink profiles via Ahrefs MCP, detect toxic links,
         track link velocity, and map Korean platform links.
Python: 3.10+
Usage:
    python backlink_auditor.py --url https://example.com --json
    python backlink_auditor.py --url https://example.com --velocity --broken --json
    python backlink_auditor.py --url https://example.com --korean-platforms --json
"""

from __future__ import annotations

import argparse
import asyncio
import json
import logging
import re
import sys
from dataclasses import dataclass, field, asdict
from datetime import datetime, timedelta
from typing import Any
from urllib.parse import urlparse

import aiohttp
import pandas as pd
from rich.console import Console
from rich.table import Table

from base_client import BaseAsyncClient, config

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logger = logging.getLogger("backlink_auditor")
console = Console()

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
AHREFS_BASE = "https://api.ahrefs.com/v3"

# Korean platform domain patterns
KOREAN_PLATFORM_PATTERNS: dict[str, list[str]] = {
    "naver_blog": ["blog.naver.com"],
    "naver_cafe": ["cafe.naver.com"],
    "naver_post": ["post.naver.com"],
    "naver_kin": ["kin.naver.com"],
    "tistory": ["tistory.com"],
    "brunch": ["brunch.co.kr"],
    "daum_blog": ["blog.daum.net"],
    "korean_news": [
        "chosun.com", "donga.com", "joongang.co.kr", "hani.co.kr",
        "khan.co.kr", "mk.co.kr", "mt.co.kr", "hankyung.com",
        "sedaily.com", "edaily.co.kr", "newsis.com", "yna.co.kr",
        "yonhapnews.co.kr", "news1.kr", "newspim.com", "etnews.com",
        "zdnet.co.kr", "bloter.net", "platum.kr", "besuccess.com",
    ],
    "korean_community": [
        "dcinside.com", "theqoo.net", "fmkorea.com", "ruliweb.com",
        "ppomppu.co.kr", "clien.net", "mlbpark.donga.com",
    ],
}

# Suspicious TLD patterns for toxic link detection
SUSPICIOUS_TLDS = {
    ".xyz", ".top", ".club", ".work", ".date", ".bid",
    ".stream", ".download", ".win", ".racing", ".review",
    ".accountant", ".loan", ".click", ".link", ".gdn",
}

# Generic anchor text patterns
GENERIC_ANCHORS = {
    "click here", "read more", "learn more", "visit", "here",
    "website", "source", "link", "this", "more info",
    "go to", "check out", "see more", "view", "details",
}

# ---------------------------------------------------------------------------
# Dataclasses
# ---------------------------------------------------------------------------


@dataclass
class BacklinkEntry:
    """Single backlink record."""
    source_url: str
    source_domain: str
    dr: float = 0.0
    anchor: str = ""
    link_type: str = "text"
    dofollow: bool = True
    first_seen: str = ""
    url_to: str = ""


@dataclass
class AnchorDistribution:
    """Anchor text distribution breakdown."""
    branded: int = 0
    exact_match: int = 0
    partial_match: int = 0
    generic: int = 0
    naked_url: int = 0
    other: int = 0
    total: int = 0
    top_anchors: list[dict[str, Any]] = field(default_factory=list)

    @property
    def branded_pct(self) -> float:
        return round(self.branded / max(self.total, 1) * 100, 1)

    @property
    def exact_match_pct(self) -> float:
        return round(self.exact_match / max(self.total, 1) * 100, 1)

    @property
    def generic_pct(self) -> float:
        return round(self.generic / max(self.total, 1) * 100, 1)

    @property
    def naked_url_pct(self) -> float:
        return round(self.naked_url / max(self.total, 1) * 100, 1)


@dataclass
class ToxicLink:
    """Potentially toxic or spammy backlink."""
    url: str
    domain: str
    reason: str
    risk_score: float = 0.0
    dr: float = 0.0
    anchor: str = ""
    first_seen: str = ""


@dataclass
class KoreanPlatformStats:
    """Backlink counts from Korean platforms."""
    naver_blog: int = 0
    naver_cafe: int = 0
    naver_post: int = 0
    naver_kin: int = 0
    tistory: int = 0
    brunch: int = 0
    daum_blog: int = 0
    korean_news: int = 0
    korean_community: int = 0
    total_korean: int = 0
    korean_domains: list[dict[str, Any]] = field(default_factory=list)


@dataclass
class LinkVelocity:
    """Link acquisition and loss velocity."""
    new_last_7d: int = 0
    new_last_30d: int = 0
    new_last_90d: int = 0
    lost_last_7d: int = 0
    lost_last_30d: int = 0
    lost_last_90d: int = 0
    velocity_trend: str = "stable"
    history: list[dict[str, Any]] = field(default_factory=list)


@dataclass
class BacklinkAuditResult:
    """Complete backlink audit result."""
    url: str
    domain: str = ""
    domain_rating: float = 0.0
    total_backlinks: int = 0
    referring_domains: int = 0
    dofollow_backlinks: int = 0
    nofollow_backlinks: int = 0
    dofollow_ratio: float = 0.0
    edu_gov_backlinks: int = 0
    anchor_distribution: AnchorDistribution | None = None
    toxic_links: list[ToxicLink] = field(default_factory=list)
    toxic_link_count: int = 0
    toxic_risk_level: str = "low"
    broken_backlinks: list[dict[str, Any]] = field(default_factory=list)
    korean_platforms: KoreanPlatformStats | None = None
    link_velocity: LinkVelocity | None = None
    dr_distribution: dict[str, int] = field(default_factory=dict)
    country_distribution: list[dict[str, Any]] = field(default_factory=list)
    issues: list[dict[str, str]] = field(default_factory=list)
    recommendations: list[str] = field(default_factory=list)
    timestamp: str = ""


# ---------------------------------------------------------------------------
# BacklinkAuditor
# ---------------------------------------------------------------------------


class BacklinkAuditor(BaseAsyncClient):
    """Analyze backlink profiles using Ahrefs MCP tools."""

    def __init__(self, **kwargs):
        super().__init__(max_concurrent=5, requests_per_second=2.0, **kwargs)
        self.session: aiohttp.ClientSession | None = None

    # -- Ahrefs MCP helper ---------------------------------------------------

    async def _call_ahrefs(
        self, endpoint: str, params: dict[str, Any]
    ) -> dict[str, Any]:
        """
        Call Ahrefs API endpoint.

        In MCP context this would call mcp__ahrefs__<endpoint>.
        For standalone use, falls back to REST API with token.
        """
        api_token = config.get_required("AHREFS_API_TOKEN") if not self.session else None

        if self.session and api_token:
            url = f"{AHREFS_BASE}/{endpoint}"
            headers = {"Authorization": f"Bearer {api_token}"}
            async with self.session.get(url, headers=headers, params=params) as resp:
                resp.raise_for_status()
                return await resp.json()

        # Fallback: return structured empty result for MCP-only usage
        logger.warning(
            f"Ahrefs call to '{endpoint}' - use MCP tool "
            f"mcp__ahrefs__{endpoint.replace('-', '_')} in Claude Desktop"
        )
        return {"endpoint": endpoint, "params": params, "data": [], "note": "mcp_stub"}

    # -- Core methods --------------------------------------------------------

    async def get_backlink_stats(self, url: str) -> dict[str, Any]:
        """Fetch backlink statistics overview via site-explorer-backlinks-stats."""
        target = urlparse(url).netloc or url
        result = await self._call_ahrefs(
            "site-explorer-backlinks-stats",
            {"target": target, "mode": "domain"},
        )
        stats = result.get("data", result) if isinstance(result, dict) else {}
        return {
            "total_backlinks": stats.get("live", 0),
            "referring_domains": stats.get("live_refdomains", 0),
            "dofollow": stats.get("live_dofollow", 0),
            "nofollow": stats.get("live_nofollow", 0),
            "edu": stats.get("edu", 0),
            "gov": stats.get("gov", 0),
            "raw": stats,
        }

    async def get_domain_rating(self, url: str) -> dict[str, Any]:
        """Fetch Domain Rating via site-explorer-domain-rating."""
        target = urlparse(url).netloc or url
        result = await self._call_ahrefs(
            "site-explorer-domain-rating",
            {"target": target},
        )
        data = result.get("data", result) if isinstance(result, dict) else {}
        return {
            "domain_rating": data.get("domain_rating", 0),
            "ahrefs_rank": data.get("ahrefs_rank", 0),
        }

    async def get_referring_domains(
        self, url: str, limit: int = 1000
    ) -> list[dict[str, Any]]:
        """List referring domains via site-explorer-referring-domains."""
        target = urlparse(url).netloc or url
        result = await self._call_ahrefs(
            "site-explorer-referring-domains",
            {"target": target, "mode": "domain", "limit": limit, "order_by": "domain_rating:desc"},
        )
        domains = result.get("data", result.get("refdomains", []))
        if isinstance(domains, dict):
            domains = domains.get("refdomains", [])
        return domains if isinstance(domains, list) else []

    async def get_all_backlinks(
        self, url: str, limit: int = 1000
    ) -> list[BacklinkEntry]:
        """Fetch all backlinks via site-explorer-all-backlinks."""
        target = urlparse(url).netloc or url
        result = await self._call_ahrefs(
            "site-explorer-all-backlinks",
            {"target": target, "mode": "domain", "limit": limit, "order_by": "domain_rating:desc"},
        )
        raw_links = result.get("data", result.get("backlinks", []))
        if isinstance(raw_links, dict):
            raw_links = raw_links.get("backlinks", [])

        backlinks = []
        for link in (raw_links if isinstance(raw_links, list) else []):
            entry = BacklinkEntry(
                source_url=link.get("url_from", ""),
                source_domain=link.get("domain_from", ""),
                dr=link.get("domain_rating", 0),
                anchor=link.get("anchor", ""),
                link_type=link.get("type", "text"),
                dofollow=not link.get("nofollow", False),
                first_seen=link.get("first_seen", ""),
                url_to=link.get("url_to", ""),
            )
            backlinks.append(entry)
        return backlinks

    async def analyze_anchors(
        self, url: str, brand_name: str = ""
    ) -> AnchorDistribution:
        """Analyze anchor text distribution via site-explorer-anchors."""
        target = urlparse(url).netloc or url
        if not brand_name:
            brand_name = target.replace("www.", "").split(".")[0]

        result = await self._call_ahrefs(
            "site-explorer-anchors",
            {"target": target, "mode": "domain", "limit": 500, "order_by": "backlinks:desc"},
        )
        raw_anchors = result.get("data", result.get("anchors", []))
        if isinstance(raw_anchors, dict):
            raw_anchors = raw_anchors.get("anchors", [])

        dist = AnchorDistribution()
        top_list = []

        for item in (raw_anchors if isinstance(raw_anchors, list) else []):
            anchor_text = item.get("anchor", "").strip().lower()
            count = item.get("backlinks", 1)
            dist.total += count

            top_list.append({
                "anchor": item.get("anchor", ""),
                "backlinks": count,
                "referring_domains": item.get("refdomains", 0),
            })

            # Classify anchor text
            if self._is_branded_anchor(anchor_text, brand_name, target):
                dist.branded += count
            elif self._is_naked_url(anchor_text, target):
                dist.naked_url += count
            elif self._is_generic_anchor(anchor_text):
                dist.generic += count
            elif self._is_exact_match(anchor_text, brand_name):
                dist.exact_match += count
            else:
                dist.partial_match += count

        dist.top_anchors = sorted(top_list, key=lambda x: x["backlinks"], reverse=True)[:20]
        return dist

    async def detect_toxic_links(
        self, backlinks: list[BacklinkEntry]
    ) -> list[ToxicLink]:
        """Identify potentially toxic backlinks using heuristic scoring."""
        toxic_links = []

        for link in backlinks:
            risk_score = 0.0
            reasons = []

            # Check 1: Suspicious TLD
            domain_tld = self._extract_tld(link.source_domain)
            if domain_tld in SUSPICIOUS_TLDS:
                risk_score += 25.0
                reasons.append(f"Suspicious TLD: {domain_tld}")

            # Check 2: Very low DR (potential PBN or link farm)
            if link.dr < 5 and link.source_domain:
                risk_score += 20.0
                reasons.append(f"Very low DR ({link.dr})")
            elif link.dr < 10:
                risk_score += 10.0
                reasons.append(f"Low DR ({link.dr})")

            # Check 3: Spammy anchor patterns
            anchor_lower = link.anchor.lower().strip()
            spam_keywords = [
                "casino", "poker", "viagra", "cialis", "payday",
                "loan", "buy cheap", "free download", "adult",
                "gambling", "betting", "porn", "xxx", "sex",
                "weight loss", "diet pill", "crypto trading",
            ]
            for kw in spam_keywords:
                if kw in anchor_lower:
                    risk_score += 30.0
                    reasons.append(f"Spam keyword in anchor: '{kw}'")
                    break

            # Check 4: Numeric/random domain patterns (e.g., abc123xyz.com)
            domain_base = link.source_domain.split(".")[0] if link.source_domain else ""
            if domain_base and re.match(r"^[a-z0-9]{15,}$", domain_base):
                risk_score += 15.0
                reasons.append("Random/generated domain name")

            # Check 5: Excessive hyphens in domain
            if domain_base.count("-") >= 3:
                risk_score += 15.0
                reasons.append("Excessive hyphens in domain")

            # Check 6: Domain contains spam-related words
            domain_spam_words = [
                "seo", "link", "backlink", "directory", "submit",
                "free-", "cheap-", "best-", "buy-",
            ]
            for sw in domain_spam_words:
                if sw in link.source_domain.lower():
                    risk_score += 10.0
                    reasons.append(f"Spam word in domain: '{sw}'")
                    break

            # Check 7: Very long domain name
            if len(domain_base) > 30:
                risk_score += 10.0
                reasons.append("Unusually long domain name")

            # Threshold: toxic if score >= 30
            if risk_score >= 30.0:
                toxic = ToxicLink(
                    url=link.source_url,
                    domain=link.source_domain,
                    reason="; ".join(reasons),
                    risk_score=min(risk_score, 100.0),
                    dr=link.dr,
                    anchor=link.anchor,
                    first_seen=link.first_seen,
                )
                toxic_links.append(toxic)

        # Sort by risk score descending
        toxic_links.sort(key=lambda t: t.risk_score, reverse=True)
        return toxic_links

    async def find_broken_backlinks(self, url: str) -> list[dict[str, Any]]:
        """Find broken backlinks for recovery via site-explorer-broken-backlinks."""
        target = urlparse(url).netloc or url
        result = await self._call_ahrefs(
            "site-explorer-broken-backlinks",
            {"target": target, "mode": "domain", "limit": 200, "order_by": "domain_rating:desc"},
        )
        raw = result.get("data", result.get("backlinks", []))
        if isinstance(raw, dict):
            raw = raw.get("backlinks", [])

        broken = []
        for item in (raw if isinstance(raw, list) else []):
            broken.append({
                "source_url": item.get("url_from", ""),
                "source_domain": item.get("domain_from", ""),
                "target_url": item.get("url_to", ""),
                "http_code": item.get("http_code", 404),
                "anchor": item.get("anchor", ""),
                "dr": item.get("domain_rating", 0),
                "first_seen": item.get("first_seen", ""),
            })

        broken.sort(key=lambda b: b.get("dr", 0), reverse=True)
        return broken

    async def track_velocity(self, url: str) -> LinkVelocity:
        """Track new/lost referring domains via refdomains-history."""
        target = urlparse(url).netloc or url
        today = datetime.now()

        result = await self._call_ahrefs(
            "site-explorer-refdomains-history",
            {
                "target": target,
                "mode": "domain",
                "date_from": (today - timedelta(days=90)).strftime("%Y-%m-%d"),
            },
        )
        raw_history = result.get("data", result.get("history", []))
        if isinstance(raw_history, dict):
            raw_history = raw_history.get("history", [])

        velocity = LinkVelocity()
        history_points = []

        if isinstance(raw_history, list) and len(raw_history) >= 2:
            for point in raw_history:
                history_points.append({
                    "date": point.get("date", ""),
                    "referring_domains": point.get("refdomains", point.get("referring_domains", 0)),
                })

            # Calculate velocity from history deltas
            sorted_history = sorted(history_points, key=lambda h: h["date"])

            if len(sorted_history) >= 2:
                latest = sorted_history[-1].get("referring_domains", 0)
                d7_ago = self._find_closest_point(sorted_history, 7)
                d30_ago = self._find_closest_point(sorted_history, 30)
                d90_ago = self._find_closest_point(sorted_history, 90)

                velocity.new_last_7d = max(0, latest - d7_ago)
                velocity.new_last_30d = max(0, latest - d30_ago)
                velocity.new_last_90d = max(0, latest - d90_ago)

                # Estimate lost (simplified: if delta is negative)
                velocity.lost_last_7d = max(0, d7_ago - latest)
                velocity.lost_last_30d = max(0, d30_ago - latest)
                velocity.lost_last_90d = max(0, d90_ago - latest)

                # Determine trend
                if velocity.new_last_30d > velocity.lost_last_30d * 2:
                    velocity.velocity_trend = "growing"
                elif velocity.lost_last_30d > velocity.new_last_30d * 2:
                    velocity.velocity_trend = "declining"
                else:
                    velocity.velocity_trend = "stable"

        velocity.history = history_points[-30:]  # Last 30 data points
        return velocity

    def map_korean_platforms(
        self, referring_domains: list[dict[str, Any]]
    ) -> KoreanPlatformStats:
        """Categorize referring domains by Korean platform."""
        stats = KoreanPlatformStats()
        korean_detail = []

        for rd in referring_domains:
            domain = rd.get("domain", rd.get("domain_from", "")).lower()
            matched_platform = None

            for platform, patterns in KOREAN_PLATFORM_PATTERNS.items():
                for pattern in patterns:
                    if pattern in domain:
                        matched_platform = platform
                        break
                if matched_platform:
                    break

            if matched_platform:
                current_val = getattr(stats, matched_platform, 0)
                setattr(stats, matched_platform, current_val + 1)
                stats.total_korean += 1
                korean_detail.append({
                    "domain": domain,
                    "platform": matched_platform,
                    "dr": rd.get("domain_rating", rd.get("dr", 0)),
                    "backlinks": rd.get("backlinks", 0),
                })

        stats.korean_domains = sorted(
            korean_detail, key=lambda d: d.get("dr", 0), reverse=True
        )
        return stats

    # -- Orchestration -------------------------------------------------------

    async def audit(
        self,
        url: str,
        include_velocity: bool = False,
        include_broken: bool = False,
        include_korean: bool = False,
        brand_name: str = "",
    ) -> BacklinkAuditResult:
        """Orchestrate a full backlink audit."""
        domain = urlparse(url).netloc or url
        logger.info(f"Starting backlink audit for: {domain}")

        result = BacklinkAuditResult(
            url=url,
            domain=domain,
            timestamp=datetime.now().isoformat(),
        )

        # Phase 1: Core metrics (parallel)
        logger.info("Phase 1: Fetching core metrics...")
        stats_task = self.get_backlink_stats(url)
        dr_task = self.get_domain_rating(url)
        anchors_task = self.analyze_anchors(url, brand_name)
        backlinks_task = self.get_all_backlinks(url, limit=1000)
        refdomains_task = self.get_referring_domains(url, limit=1000)

        stats, dr_info, anchors, backlinks, refdomains = await asyncio.gather(
            stats_task, dr_task, anchors_task, backlinks_task, refdomains_task,
            return_exceptions=True,
        )

        # Process stats
        if isinstance(stats, dict):
            result.total_backlinks = stats.get("total_backlinks", 0)
            result.referring_domains = stats.get("referring_domains", 0)
            result.dofollow_backlinks = stats.get("dofollow", 0)
            result.nofollow_backlinks = stats.get("nofollow", 0)
            result.edu_gov_backlinks = stats.get("edu", 0) + stats.get("gov", 0)
            total = result.dofollow_backlinks + result.nofollow_backlinks
            result.dofollow_ratio = round(
                result.dofollow_backlinks / max(total, 1), 2
            )

        # Process DR
        if isinstance(dr_info, dict):
            result.domain_rating = dr_info.get("domain_rating", 0)

        # Process anchors
        if isinstance(anchors, AnchorDistribution):
            result.anchor_distribution = anchors

        # Phase 2: Toxic link detection
        if isinstance(backlinks, list):
            logger.info("Phase 2: Detecting toxic links...")
            result.toxic_links = await self.detect_toxic_links(backlinks)
            result.toxic_link_count = len(result.toxic_links)

            # Determine toxic risk level
            if result.toxic_link_count > 50:
                result.toxic_risk_level = "critical"
            elif result.toxic_link_count > 20:
                result.toxic_risk_level = "high"
            elif result.toxic_link_count > 5:
                result.toxic_risk_level = "medium"
            else:
                result.toxic_risk_level = "low"

        # Phase 3: DR distribution of referring domains
        if isinstance(refdomains, list):
            dr_dist = {"0-10": 0, "11-20": 0, "21-30": 0, "31-40": 0,
                       "41-50": 0, "51-60": 0, "61-70": 0, "71-80": 0,
                       "81-90": 0, "91-100": 0}
            country_counts: dict[str, int] = {}

            for rd in refdomains:
                dr_val = rd.get("domain_rating", rd.get("dr", 0))
                bucket = self._dr_bucket(dr_val)
                dr_dist[bucket] = dr_dist.get(bucket, 0) + 1

                country = rd.get("country", "unknown")
                country_counts[country] = country_counts.get(country, 0) + 1

            result.dr_distribution = dr_dist
            result.country_distribution = sorted(
                [{"country": k, "count": v} for k, v in country_counts.items()],
                key=lambda x: x["count"], reverse=True,
            )[:20]

        # Phase 4: Optional analyses
        if include_velocity:
            logger.info("Phase 4a: Tracking link velocity...")
            velocity = await self.track_velocity(url)
            if isinstance(velocity, LinkVelocity):
                result.link_velocity = velocity

        if include_broken:
            logger.info("Phase 4b: Finding broken backlinks...")
            broken = await self.find_broken_backlinks(url)
            if isinstance(broken, list):
                result.broken_backlinks = broken

        if include_korean and isinstance(refdomains, list):
            logger.info("Phase 4c: Mapping Korean platforms...")
            korean_stats = self.map_korean_platforms(refdomains)
            result.korean_platforms = korean_stats

        # Phase 5: Generate issues and recommendations
        self._generate_issues(result)
        self._generate_recommendations(result)

        logger.info(f"Backlink audit complete for {domain}")
        return result

    # -- Helpers -------------------------------------------------------------

    @staticmethod
    def _is_branded_anchor(anchor: str, brand: str, domain: str) -> bool:
        """Check if anchor text is a brand mention."""
        brand_lower = brand.lower()
        domain_clean = domain.replace("www.", "").split(".")[0].lower()
        return (
            brand_lower in anchor
            or domain_clean in anchor
            or domain.lower() in anchor
        )

    @staticmethod
    def _is_naked_url(anchor: str, domain: str) -> bool:
        """Check if anchor text is a naked URL."""
        return (
            anchor.startswith("http")
            or anchor.startswith("www.")
            or domain.lower() in anchor
            and ("/" in anchor or "." in anchor)
        )

    @staticmethod
    def _is_generic_anchor(anchor: str) -> bool:
        """Check if anchor text is generic."""
        return anchor.lower().strip() in GENERIC_ANCHORS

    @staticmethod
    def _is_exact_match(anchor: str, brand: str) -> bool:
        """Check if anchor is exact-match keyword (not brand)."""
        # Exact match if it looks like a keyword phrase (2+ words, no brand)
        words = anchor.split()
        return len(words) >= 2 and brand.lower() not in anchor.lower()

    @staticmethod
    def _extract_tld(domain: str) -> str:
        """Extract TLD from domain."""
        if not domain:
            return ""
        parts = domain.rsplit(".", 1)
        return f".{parts[-1]}" if len(parts) > 1 else ""

    @staticmethod
    def _dr_bucket(dr: float) -> str:
        """Map DR value to bucket range."""
        if dr <= 10:
            return "0-10"
        elif dr <= 20:
            return "11-20"
        elif dr <= 30:
            return "21-30"
        elif dr <= 40:
            return "31-40"
        elif dr <= 50:
            return "41-50"
        elif dr <= 60:
            return "51-60"
        elif dr <= 70:
            return "61-70"
        elif dr <= 80:
            return "71-80"
        elif dr <= 90:
            return "81-90"
        else:
            return "91-100"

    @staticmethod
    def _find_closest_point(history: list[dict], days_ago: int) -> int:
        """Find referring domain count closest to N days ago."""
        if not history:
            return 0
        target_date = (datetime.now() - timedelta(days=days_ago)).strftime("%Y-%m-%d")
        closest = history[0]
        for point in history:
            if point.get("date", "") <= target_date:
                closest = point
        return closest.get("referring_domains", 0)

    def _generate_issues(self, result: BacklinkAuditResult) -> None:
        """Generate audit issues based on findings."""
        issues = []

        # Toxic links
        if result.toxic_link_count > 20:
            issues.append({
                "type": "error",
                "category": "toxic_links",
                "message": f"High toxic link count: {result.toxic_link_count} toxic backlinks detected",
            })
        elif result.toxic_link_count > 5:
            issues.append({
                "type": "warning",
                "category": "toxic_links",
                "message": f"Moderate toxic links: {result.toxic_link_count} potentially harmful backlinks",
            })

        # Low DR
        if result.domain_rating < 20:
            issues.append({
                "type": "warning",
                "category": "domain_authority",
                "message": f"Low Domain Rating ({result.domain_rating}) - weak backlink profile",
            })

        # Dofollow ratio
        if result.dofollow_ratio < 0.5:
            issues.append({
                "type": "warning",
                "category": "link_quality",
                "message": f"Low dofollow ratio ({result.dofollow_ratio:.0%}) - majority are nofollow",
            })

        # Anchor distribution issues
        if result.anchor_distribution:
            if result.anchor_distribution.exact_match_pct > 30:
                issues.append({
                    "type": "error",
                    "category": "anchor_text",
                    "message": f"Over-optimized anchors: {result.anchor_distribution.exact_match_pct}% exact match (risk of penalty)",
                })
            if result.anchor_distribution.branded_pct < 10:
                issues.append({
                    "type": "warning",
                    "category": "anchor_text",
                    "message": f"Low branded anchors ({result.anchor_distribution.branded_pct}%) - unnatural profile",
                })

        # Broken backlinks
        if len(result.broken_backlinks) > 10:
            issues.append({
                "type": "warning",
                "category": "broken_links",
                "message": f"{len(result.broken_backlinks)} broken backlinks found - recovery opportunities",
            })

        # Velocity issues
        if result.link_velocity and result.link_velocity.velocity_trend == "declining":
            issues.append({
                "type": "warning",
                "category": "velocity",
                "message": "Declining link velocity - losing more links than gaining",
            })

        result.issues = issues

    def _generate_recommendations(self, result: BacklinkAuditResult) -> None:
        """Generate actionable recommendations."""
        recs = []

        if result.toxic_risk_level in ("critical", "high"):
            recs.append(
                "Disavow toxic backlinks immediately using Google Disavow Tool. "
                f"Priority: {result.toxic_link_count} toxic links detected."
            )

        if result.domain_rating < 30:
            recs.append(
                "Focus on acquiring high-DR backlinks (DR 40+) from authoritative "
                "domains in your niche to improve Domain Rating."
            )

        if result.anchor_distribution and result.anchor_distribution.exact_match_pct > 25:
            recs.append(
                "Diversify anchor text profile. Reduce exact-match anchors and "
                "increase branded, generic, and naked URL anchors."
            )

        if len(result.broken_backlinks) > 5:
            high_dr_broken = [b for b in result.broken_backlinks if b.get("dr", 0) > 30]
            recs.append(
                f"Reclaim {len(result.broken_backlinks)} broken backlinks "
                f"({len(high_dr_broken)} from DR 30+ domains). Set up 301 redirects "
                "or recreate content at original URLs."
            )

        if result.korean_platforms and result.korean_platforms.total_korean < 10:
            recs.append(
                "Increase presence on Korean platforms (Naver Blog, Tistory, Brunch). "
                "Korean platform links signal local relevance for Korean SERP rankings."
            )

        if result.link_velocity and result.link_velocity.velocity_trend == "declining":
            recs.append(
                "Reverse declining link velocity with active outreach: guest posting, "
                "digital PR, and content promotion campaigns."
            )

        if result.edu_gov_backlinks == 0:
            recs.append(
                "Target .edu and .gov backlinks through scholarship programs, "
                "research collaborations, or government resource pages."
            )

        result.recommendations = recs


# ---------------------------------------------------------------------------
# Output Formatting
# ---------------------------------------------------------------------------


def format_rich_output(result: BacklinkAuditResult) -> None:
    """Display audit results using Rich tables."""
    console.print(f"\n[bold cyan]Backlink Audit: {result.domain}[/bold cyan]")
    console.print(f"[dim]Timestamp: {result.timestamp}[/dim]\n")

    # Overview table
    overview = Table(title="Overview", show_header=True, header_style="bold magenta")
    overview.add_column("Metric", style="cyan")
    overview.add_column("Value", style="green")
    overview.add_row("Domain Rating", str(result.domain_rating))
    overview.add_row("Total Backlinks", f"{result.total_backlinks:,}")
    overview.add_row("Referring Domains", f"{result.referring_domains:,}")
    overview.add_row("Dofollow Ratio", f"{result.dofollow_ratio:.0%}")
    overview.add_row("Edu/Gov Backlinks", str(result.edu_gov_backlinks))
    overview.add_row("Toxic Links", f"{result.toxic_link_count} ({result.toxic_risk_level})")
    console.print(overview)

    # Anchor distribution
    if result.anchor_distribution:
        anchor_table = Table(title="\nAnchor Distribution", show_header=True, header_style="bold magenta")
        anchor_table.add_column("Type", style="cyan")
        anchor_table.add_column("Count", style="green")
        anchor_table.add_column("Percentage", style="yellow")
        ad = result.anchor_distribution
        anchor_table.add_row("Branded", str(ad.branded), f"{ad.branded_pct}%")
        anchor_table.add_row("Exact Match", str(ad.exact_match), f"{ad.exact_match_pct}%")
        anchor_table.add_row("Partial Match", str(ad.partial_match), f"{(ad.partial_match / max(ad.total, 1) * 100):.1f}%")
        anchor_table.add_row("Generic", str(ad.generic), f"{ad.generic_pct}%")
        anchor_table.add_row("Naked URL", str(ad.naked_url), f"{ad.naked_url_pct}%")
        console.print(anchor_table)

    # DR distribution
    if result.dr_distribution:
        dr_table = Table(title="\nDR Distribution (Referring Domains)", show_header=True, header_style="bold magenta")
        dr_table.add_column("DR Range", style="cyan")
        dr_table.add_column("Count", style="green")
        for bucket, count in result.dr_distribution.items():
            dr_table.add_row(bucket, str(count))
        console.print(dr_table)

    # Toxic links (top 10)
    if result.toxic_links:
        toxic_table = Table(title=f"\nToxic Links (Top 10 of {len(result.toxic_links)})", show_header=True, header_style="bold red")
        toxic_table.add_column("Domain", style="red")
        toxic_table.add_column("Risk", style="yellow")
        toxic_table.add_column("Reason", style="dim")
        for tl in result.toxic_links[:10]:
            toxic_table.add_row(tl.domain, f"{tl.risk_score:.0f}", tl.reason[:60])
        console.print(toxic_table)

    # Korean platforms
    if result.korean_platforms and result.korean_platforms.total_korean > 0:
        kr_table = Table(title="\nKorean Platform Links", show_header=True, header_style="bold magenta")
        kr_table.add_column("Platform", style="cyan")
        kr_table.add_column("Count", style="green")
        kp = result.korean_platforms
        for plat, val in [
            ("Naver Blog", kp.naver_blog), ("Naver Cafe", kp.naver_cafe),
            ("Naver Post", kp.naver_post), ("Tistory", kp.tistory),
            ("Brunch", kp.brunch), ("Daum Blog", kp.daum_blog),
            ("Korean News", kp.korean_news), ("Korean Community", kp.korean_community),
        ]:
            if val > 0:
                kr_table.add_row(plat, str(val))
        kr_table.add_row("[bold]Total Korean[/bold]", f"[bold]{kp.total_korean}[/bold]")
        console.print(kr_table)

    # Link velocity
    if result.link_velocity:
        vel_table = Table(title="\nLink Velocity", show_header=True, header_style="bold magenta")
        vel_table.add_column("Period", style="cyan")
        vel_table.add_column("New", style="green")
        vel_table.add_column("Lost", style="red")
        lv = result.link_velocity
        vel_table.add_row("Last 7 days", str(lv.new_last_7d), str(lv.lost_last_7d))
        vel_table.add_row("Last 30 days", str(lv.new_last_30d), str(lv.lost_last_30d))
        vel_table.add_row("Last 90 days", str(lv.new_last_90d), str(lv.lost_last_90d))
        vel_table.add_row("Trend", lv.velocity_trend, "")
        console.print(vel_table)

    # Broken backlinks
    if result.broken_backlinks:
        broken_table = Table(title=f"\nBroken Backlinks (Top 10 of {len(result.broken_backlinks)})", show_header=True, header_style="bold magenta")
        broken_table.add_column("Source Domain", style="cyan")
        broken_table.add_column("DR", style="green")
        broken_table.add_column("Target URL", style="dim")
        for bl in result.broken_backlinks[:10]:
            broken_table.add_row(
                bl.get("source_domain", ""),
                str(bl.get("dr", 0)),
                bl.get("target_url", "")[:50],
            )
        console.print(broken_table)

    # Issues
    if result.issues:
        console.print("\n[bold red]Issues Found:[/bold red]")
        for issue in result.issues:
            icon = "[red]ERROR[/red]" if issue["type"] == "error" else "[yellow]WARN[/yellow]"
            console.print(f"  {icon} [{issue['category']}] {issue['message']}")

    # Recommendations
    if result.recommendations:
        console.print("\n[bold green]Recommendations:[/bold green]")
        for i, rec in enumerate(result.recommendations, 1):
            console.print(f"  {i}. {rec}")

    console.print()


def result_to_dict(result: BacklinkAuditResult) -> dict[str, Any]:
    """Convert audit result to JSON-serializable dict."""
    data = {
        "url": result.url,
        "domain": result.domain,
        "domain_rating": result.domain_rating,
        "backlink_stats": {
            "total_backlinks": result.total_backlinks,
            "referring_domains": result.referring_domains,
            "dofollow_backlinks": result.dofollow_backlinks,
            "nofollow_backlinks": result.nofollow_backlinks,
            "dofollow_ratio": result.dofollow_ratio,
            "edu_gov_backlinks": result.edu_gov_backlinks,
        },
        "anchor_distribution": asdict(result.anchor_distribution) if result.anchor_distribution else None,
        "dr_distribution": result.dr_distribution,
        "country_distribution": result.country_distribution,
        "toxic_links": [asdict(t) for t in result.toxic_links],
        "toxic_link_count": result.toxic_link_count,
        "toxic_risk_level": result.toxic_risk_level,
        "broken_backlinks": result.broken_backlinks,
        "korean_platforms": asdict(result.korean_platforms) if result.korean_platforms else None,
        "link_velocity": asdict(result.link_velocity) if result.link_velocity else None,
        "issues": result.issues,
        "recommendations": result.recommendations,
        "timestamp": result.timestamp,
    }
    return data


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------


def parse_args() -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Backlink Auditor - Analyze backlink profiles and detect toxic links",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python backlink_auditor.py --url https://example.com --json
  python backlink_auditor.py --url https://example.com --velocity --broken --json
  python backlink_auditor.py --url https://example.com --korean-platforms --json
  python backlink_auditor.py --url https://example.com --velocity --broken --korean-platforms --output report.json
        """,
    )
    parser.add_argument("--url", required=True, help="Target URL or domain to audit")
    parser.add_argument("--brand", default="", help="Brand name for anchor classification")
    parser.add_argument("--velocity", action="store_true", help="Include link velocity tracking")
    parser.add_argument("--broken", action="store_true", help="Include broken backlink analysis")
    parser.add_argument("--korean-platforms", action="store_true", help="Include Korean platform link mapping")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--output", "-o", help="Save output to file")
    return parser.parse_args()


async def main() -> None:
    """Main entry point."""
    args = parse_args()

    auditor = BacklinkAuditor()

    try:
        result = await auditor.audit(
            url=args.url,
            include_velocity=args.velocity,
            include_broken=args.broken,
            include_korean=args.korean_platforms,
            brand_name=args.brand,
        )

        if args.json or args.output:
            output_data = result_to_dict(result)
            json_str = json.dumps(output_data, indent=2, ensure_ascii=False)

            if args.output:
                with open(args.output, "w", encoding="utf-8") as f:
                    f.write(json_str)
                logger.info(f"Report saved to {args.output}")

            if args.json:
                print(json_str)
        else:
            format_rich_output(result)

        auditor.print_stats()

    except KeyboardInterrupt:
        logger.warning("Audit interrupted by user")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Audit failed: {e}")
        if args.json:
            print(json.dumps({"error": str(e)}, indent=2))
        sys.exit(1)


if __name__ == "__main__":
    asyncio.run(main())