our-claude-skills/custom-skills/99_archive/seo-audit-agent/scripts/pagespeed_client.py

"""
PageSpeed Insights Client
=========================
Purpose: Get Core Web Vitals and performance data from PageSpeed Insights API
Python: 3.10+
Usage:
    from pagespeed_client import PageSpeedClient
    client = PageSpeedClient()
    result = client.analyze("https://example.com")
"""

import argparse
import json
import logging
from dataclasses import dataclass, field
from typing import Any

import requests

from base_client import config

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)


@dataclass
class CoreWebVitals:
    """Core Web Vitals metrics."""

    lcp: float | None = None  # Largest Contentful Paint (ms)
    fid: float | None = None  # First Input Delay (ms)
    cls: float | None = None  # Cumulative Layout Shift
    inp: float | None = None  # Interaction to Next Paint (ms)
    ttfb: float | None = None  # Time to First Byte (ms)
    fcp: float | None = None  # First Contentful Paint (ms)

    # Assessment (GOOD, NEEDS_IMPROVEMENT, POOR)
    lcp_rating: str | None = None
    fid_rating: str | None = None
    cls_rating: str | None = None
    inp_rating: str | None = None

    def to_dict(self) -> dict:
        return {
            "lcp": {"value": self.lcp, "rating": self.lcp_rating},
            "fid": {"value": self.fid, "rating": self.fid_rating},
            "cls": {"value": self.cls, "rating": self.cls_rating},
            "inp": {"value": self.inp, "rating": self.inp_rating},
            "ttfb": {"value": self.ttfb},
            "fcp": {"value": self.fcp},
        }


@dataclass
class PageSpeedResult:
    """PageSpeed analysis result."""

    url: str
    strategy: str  # mobile or desktop
    performance_score: float | None = None
    seo_score: float | None = None
    accessibility_score: float | None = None
    best_practices_score: float | None = None
    core_web_vitals: CoreWebVitals = field(default_factory=CoreWebVitals)
    opportunities: list[dict] = field(default_factory=list)
    diagnostics: list[dict] = field(default_factory=list)
    passed_audits: list[str] = field(default_factory=list)
    raw_data: dict = field(default_factory=dict)

    def to_dict(self) -> dict:
        return {
            "url": self.url,
            "strategy": self.strategy,
            "scores": {
                "performance": self.performance_score,
                "seo": self.seo_score,
                "accessibility": self.accessibility_score,
                "best_practices": self.best_practices_score,
            },
            "core_web_vitals": self.core_web_vitals.to_dict(),
            "opportunities_count": len(self.opportunities),
            "opportunities": self.opportunities[:10],
            "diagnostics_count": len(self.diagnostics),
            "passed_audits_count": len(self.passed_audits),
        }


class PageSpeedClient:
    """Client for PageSpeed Insights API."""

    BASE_URL = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"

    # Core Web Vitals thresholds
    THRESHOLDS = {
        "lcp": {"good": 2500, "poor": 4000},
        "fid": {"good": 100, "poor": 300},
        "cls": {"good": 0.1, "poor": 0.25},
        "inp": {"good": 200, "poor": 500},
        "ttfb": {"good": 800, "poor": 1800},
        "fcp": {"good": 1800, "poor": 3000},
    }

    def __init__(self, api_key: str | None = None):
        """
        Initialize PageSpeed client.

        Args:
            api_key: PageSpeed API key (optional but recommended for higher quotas)
        """
        self.api_key = api_key or config.pagespeed_api_key
        self.session = requests.Session()

    def _rate_metric(self, metric: str, value: float | None) -> str | None:
        """Rate a metric against thresholds."""
        if value is None:
            return None

        thresholds = self.THRESHOLDS.get(metric)
        if not thresholds:
            return None

        if value <= thresholds["good"]:
            return "GOOD"
        elif value <= thresholds["poor"]:
            return "NEEDS_IMPROVEMENT"
        else:
            return "POOR"

    def analyze(
        self,
        url: str,
        strategy: str = "mobile",
        categories: list[str] | None = None,
    ) -> PageSpeedResult:
        """
        Analyze a URL with PageSpeed Insights.

        Args:
            url: URL to analyze
            strategy: "mobile" or "desktop"
            categories: Categories to analyze (performance, seo, accessibility, best-practices)

        Returns:
            PageSpeedResult with scores and metrics
        """
        if categories is None:
            categories = ["performance", "seo", "accessibility", "best-practices"]

        params = {
            "url": url,
            "strategy": strategy,
            "category": categories,
        }

        if self.api_key:
            params["key"] = self.api_key

        try:
            response = self.session.get(self.BASE_URL, params=params, timeout=60)
            response.raise_for_status()
            data = response.json()
        except requests.RequestException as e:
            logger.error(f"PageSpeed API request failed: {e}")
            raise

        result = PageSpeedResult(url=url, strategy=strategy, raw_data=data)

        # Extract scores
        lighthouse = data.get("lighthouseResult", {})
        categories_data = lighthouse.get("categories", {})

        if "performance" in categories_data:
            score = categories_data["performance"].get("score")
            result.performance_score = score * 100 if score else None

        if "seo" in categories_data:
            score = categories_data["seo"].get("score")
            result.seo_score = score * 100 if score else None

        if "accessibility" in categories_data:
            score = categories_data["accessibility"].get("score")
            result.accessibility_score = score * 100 if score else None

        if "best-practices" in categories_data:
            score = categories_data["best-practices"].get("score")
            result.best_practices_score = score * 100 if score else None

        # Extract Core Web Vitals
        audits = lighthouse.get("audits", {})

        # Lab data
        cwv = result.core_web_vitals

        if "largest-contentful-paint" in audits:
            cwv.lcp = audits["largest-contentful-paint"].get("numericValue")
            cwv.lcp_rating = self._rate_metric("lcp", cwv.lcp)

        if "total-blocking-time" in audits:
            # TBT is proxy for FID in lab data
            cwv.fid = audits["total-blocking-time"].get("numericValue")
            cwv.fid_rating = self._rate_metric("fid", cwv.fid)

        if "cumulative-layout-shift" in audits:
            cwv.cls = audits["cumulative-layout-shift"].get("numericValue")
            cwv.cls_rating = self._rate_metric("cls", cwv.cls)

        if "experimental-interaction-to-next-paint" in audits:
            cwv.inp = audits["experimental-interaction-to-next-paint"].get("numericValue")
            cwv.inp_rating = self._rate_metric("inp", cwv.inp)

        if "server-response-time" in audits:
            cwv.ttfb = audits["server-response-time"].get("numericValue")

        if "first-contentful-paint" in audits:
            cwv.fcp = audits["first-contentful-paint"].get("numericValue")

        # Field data (real user data) if available
        loading_exp = data.get("loadingExperience", {})
        metrics = loading_exp.get("metrics", {})

        if "LARGEST_CONTENTFUL_PAINT_MS" in metrics:
            cwv.lcp = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("percentile")
            cwv.lcp_rating = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("category")

        if "FIRST_INPUT_DELAY_MS" in metrics:
            cwv.fid = metrics["FIRST_INPUT_DELAY_MS"].get("percentile")
            cwv.fid_rating = metrics["FIRST_INPUT_DELAY_MS"].get("category")

        if "CUMULATIVE_LAYOUT_SHIFT_SCORE" in metrics:
            cwv.cls = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("percentile") / 100
            cwv.cls_rating = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("category")

        if "INTERACTION_TO_NEXT_PAINT" in metrics:
            cwv.inp = metrics["INTERACTION_TO_NEXT_PAINT"].get("percentile")
            cwv.inp_rating = metrics["INTERACTION_TO_NEXT_PAINT"].get("category")

        # Extract opportunities
        for audit_id, audit in audits.items():
            if audit.get("details", {}).get("type") == "opportunity":
                savings = audit.get("details", {}).get("overallSavingsMs", 0)
                if savings > 0:
                    result.opportunities.append({
                        "id": audit_id,
                        "title": audit.get("title", ""),
                        "description": audit.get("description", ""),
                        "savings_ms": savings,
                        "score": audit.get("score", 0),
                    })

        # Sort opportunities by savings
        result.opportunities.sort(key=lambda x: x["savings_ms"], reverse=True)

        # Extract diagnostics
        for audit_id, audit in audits.items():
            score = audit.get("score")
            if score is not None and score < 1 and audit.get("details"):
                if audit.get("details", {}).get("type") not in ["opportunity", None]:
                    result.diagnostics.append({
                        "id": audit_id,
                        "title": audit.get("title", ""),
                        "description": audit.get("description", ""),
                        "score": score,
                    })

        # Extract passed audits
        for audit_id, audit in audits.items():
            if audit.get("score") == 1:
                result.passed_audits.append(audit.get("title", audit_id))

        return result

    def analyze_both_strategies(self, url: str) -> dict:
        """Analyze URL for both mobile and desktop."""
        mobile = self.analyze(url, strategy="mobile")
        desktop = self.analyze(url, strategy="desktop")

        return {
            "url": url,
            "mobile": mobile.to_dict(),
            "desktop": desktop.to_dict(),
            "comparison": {
                "performance_difference": (
                    (desktop.performance_score or 0) - (mobile.performance_score or 0)
                ),
                "mobile_first_issues": self._identify_mobile_issues(mobile, desktop),
            },
        }

    def _identify_mobile_issues(
        self,
        mobile: PageSpeedResult,
        desktop: PageSpeedResult,
    ) -> list[str]:
        """Identify issues that affect mobile more than desktop."""
        issues = []

        if mobile.performance_score and desktop.performance_score:
            if desktop.performance_score - mobile.performance_score > 20:
                issues.append("Significant performance gap between mobile and desktop")

        m_cwv = mobile.core_web_vitals
        d_cwv = desktop.core_web_vitals

        if m_cwv.lcp and d_cwv.lcp and m_cwv.lcp > d_cwv.lcp * 1.5:
            issues.append("LCP significantly slower on mobile")

        if m_cwv.cls and d_cwv.cls and m_cwv.cls > d_cwv.cls * 2:
            issues.append("Layout shift issues more severe on mobile")

        return issues

    def get_cwv_summary(self, url: str) -> dict:
        """Get a summary focused on Core Web Vitals."""
        result = self.analyze(url, strategy="mobile")

        cwv = result.core_web_vitals

        return {
            "url": url,
            "overall_cwv_status": self._overall_cwv_status(cwv),
            "metrics": {
                "lcp": {
                    "value": f"{cwv.lcp / 1000:.2f}s" if cwv.lcp else None,
                    "rating": cwv.lcp_rating,
                    "threshold": "≤ 2.5s good, > 4.0s poor",
                },
                "fid": {
                    "value": f"{cwv.fid:.0f}ms" if cwv.fid else None,
                    "rating": cwv.fid_rating,
                    "threshold": "≤ 100ms good, > 300ms poor",
                },
                "cls": {
                    "value": f"{cwv.cls:.3f}" if cwv.cls else None,
                    "rating": cwv.cls_rating,
                    "threshold": "≤ 0.1 good, > 0.25 poor",
                },
                "inp": {
                    "value": f"{cwv.inp:.0f}ms" if cwv.inp else None,
                    "rating": cwv.inp_rating,
                    "threshold": "≤ 200ms good, > 500ms poor",
                },
            },
            "top_opportunities": result.opportunities[:5],
        }

    def _overall_cwv_status(self, cwv: CoreWebVitals) -> str:
        """Determine overall Core Web Vitals status."""
        ratings = [cwv.lcp_rating, cwv.fid_rating, cwv.cls_rating]
        ratings = [r for r in ratings if r]

        if not ratings:
            return "UNKNOWN"

        if any(r == "POOR" for r in ratings):
            return "POOR"
        if any(r == "NEEDS_IMPROVEMENT" for r in ratings):
            return "NEEDS_IMPROVEMENT"
        return "GOOD"

    def generate_report(self, result: PageSpeedResult) -> str:
        """Generate human-readable performance report."""
        lines = [
            "=" * 60,
            "PageSpeed Insights Report",
            "=" * 60,
            f"URL: {result.url}",
            f"Strategy: {result.strategy}",
            "",
            "Scores:",
            f"  Performance: {result.performance_score:.0f}/100" if result.performance_score else "  Performance: N/A",
            f"  SEO: {result.seo_score:.0f}/100" if result.seo_score else "  SEO: N/A",
            f"  Accessibility: {result.accessibility_score:.0f}/100" if result.accessibility_score else "  Accessibility: N/A",
            f"  Best Practices: {result.best_practices_score:.0f}/100" if result.best_practices_score else "  Best Practices: N/A",
            "",
            "Core Web Vitals:",
        ]

        cwv = result.core_web_vitals

        def format_metric(name: str, value: Any, rating: str | None, unit: str) -> str:
            if value is None:
                return f"  {name}: N/A"
            rating_str = f" ({rating})" if rating else ""
            return f"  {name}: {value}{unit}{rating_str}"

        lines.append(format_metric("LCP", f"{cwv.lcp / 1000:.2f}" if cwv.lcp else None, cwv.lcp_rating, "s"))
        lines.append(format_metric("FID/TBT", f"{cwv.fid:.0f}" if cwv.fid else None, cwv.fid_rating, "ms"))
        lines.append(format_metric("CLS", f"{cwv.cls:.3f}" if cwv.cls else None, cwv.cls_rating, ""))
        lines.append(format_metric("INP", f"{cwv.inp:.0f}" if cwv.inp else None, cwv.inp_rating, "ms"))
        lines.append(format_metric("TTFB", f"{cwv.ttfb:.0f}" if cwv.ttfb else None, None, "ms"))
        lines.append(format_metric("FCP", f"{cwv.fcp / 1000:.2f}" if cwv.fcp else None, None, "s"))

        if result.opportunities:
            lines.extend([
                "",
                f"Top Opportunities ({len(result.opportunities)} total):",
            ])
            for opp in result.opportunities[:5]:
                savings = opp["savings_ms"]
                lines.append(f"  - {opp['title']}: -{savings / 1000:.1f}s potential savings")

        lines.extend(["", "=" * 60])

        return "\n".join(lines)


def main():
    """CLI entry point."""
    parser = argparse.ArgumentParser(description="PageSpeed Insights Client")
    parser.add_argument("--url", "-u", required=True, help="URL to analyze")
    parser.add_argument("--strategy", "-s", default="mobile",
                       choices=["mobile", "desktop", "both"],
                       help="Analysis strategy")
    parser.add_argument("--output", "-o", help="Output file for JSON")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument("--cwv-only", action="store_true",
                       help="Show only Core Web Vitals summary")

    args = parser.parse_args()

    client = PageSpeedClient()

    if args.cwv_only:
        summary = client.get_cwv_summary(args.url)
        print(json.dumps(summary, indent=2))
    elif args.strategy == "both":
        result = client.analyze_both_strategies(args.url)
        output = json.dumps(result, indent=2)
        if args.output:
            with open(args.output, "w") as f:
                f.write(output)
        else:
            print(output)
    else:
        result = client.analyze(args.url, strategy=args.strategy)

        if args.json or args.output:
            output = json.dumps(result.to_dict(), indent=2)
            if args.output:
                with open(args.output, "w") as f:
                    f.write(output)
            else:
                print(output)
        else:
            print(client.generate_report(result))


if __name__ == "__main__":
    main()