Files
our-claude-skills/custom-skills/99_archive/seo-audit-agent/scripts/pagespeed_client.py
Andrew Yim b69e4b6f3a refactor: Reorganize skill numbering and update documentation
Skill Numbering Changes:
- 01-03: OurDigital core (was 30-32)
- 31-32: Notion tools (was 01-02)
- 99_archive: Renamed from _archive for sorting

New Files:
- AGENTS.md: Claude Code agent routing guide
- requirements.txt for 00-claude-code-setting, 32-notion-writer, 43-jamie-youtube-manager

Documentation Updates:
- CLAUDE.md: Updated skill inventory (23 skills)
- AUDIT_REPORT.md: Current completion status (91%)
- Archived REFACTORING_PLAN.md (most tasks complete)

Removed:
- ga-agent-skills/ (moved to separate repo ~/Project/dintel-ga4-agent)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 18:42:39 +07:00

453 lines
16 KiB
Python

"""
PageSpeed Insights Client
=========================
Purpose: Get Core Web Vitals and performance data from PageSpeed Insights API
Python: 3.10+
Usage:
from pagespeed_client import PageSpeedClient
client = PageSpeedClient()
result = client.analyze("https://example.com")
"""
import argparse
import json
import logging
from dataclasses import dataclass, field
from typing import Any
import requests
from base_client import config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
@dataclass
class CoreWebVitals:
"""Core Web Vitals metrics."""
lcp: float | None = None # Largest Contentful Paint (ms)
fid: float | None = None # First Input Delay (ms)
cls: float | None = None # Cumulative Layout Shift
inp: float | None = None # Interaction to Next Paint (ms)
ttfb: float | None = None # Time to First Byte (ms)
fcp: float | None = None # First Contentful Paint (ms)
# Assessment (GOOD, NEEDS_IMPROVEMENT, POOR)
lcp_rating: str | None = None
fid_rating: str | None = None
cls_rating: str | None = None
inp_rating: str | None = None
def to_dict(self) -> dict:
return {
"lcp": {"value": self.lcp, "rating": self.lcp_rating},
"fid": {"value": self.fid, "rating": self.fid_rating},
"cls": {"value": self.cls, "rating": self.cls_rating},
"inp": {"value": self.inp, "rating": self.inp_rating},
"ttfb": {"value": self.ttfb},
"fcp": {"value": self.fcp},
}
@dataclass
class PageSpeedResult:
"""PageSpeed analysis result."""
url: str
strategy: str # mobile or desktop
performance_score: float | None = None
seo_score: float | None = None
accessibility_score: float | None = None
best_practices_score: float | None = None
core_web_vitals: CoreWebVitals = field(default_factory=CoreWebVitals)
opportunities: list[dict] = field(default_factory=list)
diagnostics: list[dict] = field(default_factory=list)
passed_audits: list[str] = field(default_factory=list)
raw_data: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"url": self.url,
"strategy": self.strategy,
"scores": {
"performance": self.performance_score,
"seo": self.seo_score,
"accessibility": self.accessibility_score,
"best_practices": self.best_practices_score,
},
"core_web_vitals": self.core_web_vitals.to_dict(),
"opportunities_count": len(self.opportunities),
"opportunities": self.opportunities[:10],
"diagnostics_count": len(self.diagnostics),
"passed_audits_count": len(self.passed_audits),
}
class PageSpeedClient:
"""Client for PageSpeed Insights API."""
BASE_URL = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
# Core Web Vitals thresholds
THRESHOLDS = {
"lcp": {"good": 2500, "poor": 4000},
"fid": {"good": 100, "poor": 300},
"cls": {"good": 0.1, "poor": 0.25},
"inp": {"good": 200, "poor": 500},
"ttfb": {"good": 800, "poor": 1800},
"fcp": {"good": 1800, "poor": 3000},
}
def __init__(self, api_key: str | None = None):
"""
Initialize PageSpeed client.
Args:
api_key: PageSpeed API key (optional but recommended for higher quotas)
"""
self.api_key = api_key or config.pagespeed_api_key
self.session = requests.Session()
def _rate_metric(self, metric: str, value: float | None) -> str | None:
"""Rate a metric against thresholds."""
if value is None:
return None
thresholds = self.THRESHOLDS.get(metric)
if not thresholds:
return None
if value <= thresholds["good"]:
return "GOOD"
elif value <= thresholds["poor"]:
return "NEEDS_IMPROVEMENT"
else:
return "POOR"
def analyze(
self,
url: str,
strategy: str = "mobile",
categories: list[str] | None = None,
) -> PageSpeedResult:
"""
Analyze a URL with PageSpeed Insights.
Args:
url: URL to analyze
strategy: "mobile" or "desktop"
categories: Categories to analyze (performance, seo, accessibility, best-practices)
Returns:
PageSpeedResult with scores and metrics
"""
if categories is None:
categories = ["performance", "seo", "accessibility", "best-practices"]
params = {
"url": url,
"strategy": strategy,
"category": categories,
}
if self.api_key:
params["key"] = self.api_key
try:
response = self.session.get(self.BASE_URL, params=params, timeout=60)
response.raise_for_status()
data = response.json()
except requests.RequestException as e:
logger.error(f"PageSpeed API request failed: {e}")
raise
result = PageSpeedResult(url=url, strategy=strategy, raw_data=data)
# Extract scores
lighthouse = data.get("lighthouseResult", {})
categories_data = lighthouse.get("categories", {})
if "performance" in categories_data:
score = categories_data["performance"].get("score")
result.performance_score = score * 100 if score else None
if "seo" in categories_data:
score = categories_data["seo"].get("score")
result.seo_score = score * 100 if score else None
if "accessibility" in categories_data:
score = categories_data["accessibility"].get("score")
result.accessibility_score = score * 100 if score else None
if "best-practices" in categories_data:
score = categories_data["best-practices"].get("score")
result.best_practices_score = score * 100 if score else None
# Extract Core Web Vitals
audits = lighthouse.get("audits", {})
# Lab data
cwv = result.core_web_vitals
if "largest-contentful-paint" in audits:
cwv.lcp = audits["largest-contentful-paint"].get("numericValue")
cwv.lcp_rating = self._rate_metric("lcp", cwv.lcp)
if "total-blocking-time" in audits:
# TBT is proxy for FID in lab data
cwv.fid = audits["total-blocking-time"].get("numericValue")
cwv.fid_rating = self._rate_metric("fid", cwv.fid)
if "cumulative-layout-shift" in audits:
cwv.cls = audits["cumulative-layout-shift"].get("numericValue")
cwv.cls_rating = self._rate_metric("cls", cwv.cls)
if "experimental-interaction-to-next-paint" in audits:
cwv.inp = audits["experimental-interaction-to-next-paint"].get("numericValue")
cwv.inp_rating = self._rate_metric("inp", cwv.inp)
if "server-response-time" in audits:
cwv.ttfb = audits["server-response-time"].get("numericValue")
if "first-contentful-paint" in audits:
cwv.fcp = audits["first-contentful-paint"].get("numericValue")
# Field data (real user data) if available
loading_exp = data.get("loadingExperience", {})
metrics = loading_exp.get("metrics", {})
if "LARGEST_CONTENTFUL_PAINT_MS" in metrics:
cwv.lcp = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("percentile")
cwv.lcp_rating = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("category")
if "FIRST_INPUT_DELAY_MS" in metrics:
cwv.fid = metrics["FIRST_INPUT_DELAY_MS"].get("percentile")
cwv.fid_rating = metrics["FIRST_INPUT_DELAY_MS"].get("category")
if "CUMULATIVE_LAYOUT_SHIFT_SCORE" in metrics:
cwv.cls = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("percentile") / 100
cwv.cls_rating = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("category")
if "INTERACTION_TO_NEXT_PAINT" in metrics:
cwv.inp = metrics["INTERACTION_TO_NEXT_PAINT"].get("percentile")
cwv.inp_rating = metrics["INTERACTION_TO_NEXT_PAINT"].get("category")
# Extract opportunities
for audit_id, audit in audits.items():
if audit.get("details", {}).get("type") == "opportunity":
savings = audit.get("details", {}).get("overallSavingsMs", 0)
if savings > 0:
result.opportunities.append({
"id": audit_id,
"title": audit.get("title", ""),
"description": audit.get("description", ""),
"savings_ms": savings,
"score": audit.get("score", 0),
})
# Sort opportunities by savings
result.opportunities.sort(key=lambda x: x["savings_ms"], reverse=True)
# Extract diagnostics
for audit_id, audit in audits.items():
score = audit.get("score")
if score is not None and score < 1 and audit.get("details"):
if audit.get("details", {}).get("type") not in ["opportunity", None]:
result.diagnostics.append({
"id": audit_id,
"title": audit.get("title", ""),
"description": audit.get("description", ""),
"score": score,
})
# Extract passed audits
for audit_id, audit in audits.items():
if audit.get("score") == 1:
result.passed_audits.append(audit.get("title", audit_id))
return result
def analyze_both_strategies(self, url: str) -> dict:
"""Analyze URL for both mobile and desktop."""
mobile = self.analyze(url, strategy="mobile")
desktop = self.analyze(url, strategy="desktop")
return {
"url": url,
"mobile": mobile.to_dict(),
"desktop": desktop.to_dict(),
"comparison": {
"performance_difference": (
(desktop.performance_score or 0) - (mobile.performance_score or 0)
),
"mobile_first_issues": self._identify_mobile_issues(mobile, desktop),
},
}
def _identify_mobile_issues(
self,
mobile: PageSpeedResult,
desktop: PageSpeedResult,
) -> list[str]:
"""Identify issues that affect mobile more than desktop."""
issues = []
if mobile.performance_score and desktop.performance_score:
if desktop.performance_score - mobile.performance_score > 20:
issues.append("Significant performance gap between mobile and desktop")
m_cwv = mobile.core_web_vitals
d_cwv = desktop.core_web_vitals
if m_cwv.lcp and d_cwv.lcp and m_cwv.lcp > d_cwv.lcp * 1.5:
issues.append("LCP significantly slower on mobile")
if m_cwv.cls and d_cwv.cls and m_cwv.cls > d_cwv.cls * 2:
issues.append("Layout shift issues more severe on mobile")
return issues
def get_cwv_summary(self, url: str) -> dict:
"""Get a summary focused on Core Web Vitals."""
result = self.analyze(url, strategy="mobile")
cwv = result.core_web_vitals
return {
"url": url,
"overall_cwv_status": self._overall_cwv_status(cwv),
"metrics": {
"lcp": {
"value": f"{cwv.lcp / 1000:.2f}s" if cwv.lcp else None,
"rating": cwv.lcp_rating,
"threshold": "≤ 2.5s good, > 4.0s poor",
},
"fid": {
"value": f"{cwv.fid:.0f}ms" if cwv.fid else None,
"rating": cwv.fid_rating,
"threshold": "≤ 100ms good, > 300ms poor",
},
"cls": {
"value": f"{cwv.cls:.3f}" if cwv.cls else None,
"rating": cwv.cls_rating,
"threshold": "≤ 0.1 good, > 0.25 poor",
},
"inp": {
"value": f"{cwv.inp:.0f}ms" if cwv.inp else None,
"rating": cwv.inp_rating,
"threshold": "≤ 200ms good, > 500ms poor",
},
},
"top_opportunities": result.opportunities[:5],
}
def _overall_cwv_status(self, cwv: CoreWebVitals) -> str:
"""Determine overall Core Web Vitals status."""
ratings = [cwv.lcp_rating, cwv.fid_rating, cwv.cls_rating]
ratings = [r for r in ratings if r]
if not ratings:
return "UNKNOWN"
if any(r == "POOR" for r in ratings):
return "POOR"
if any(r == "NEEDS_IMPROVEMENT" for r in ratings):
return "NEEDS_IMPROVEMENT"
return "GOOD"
def generate_report(self, result: PageSpeedResult) -> str:
"""Generate human-readable performance report."""
lines = [
"=" * 60,
"PageSpeed Insights Report",
"=" * 60,
f"URL: {result.url}",
f"Strategy: {result.strategy}",
"",
"Scores:",
f" Performance: {result.performance_score:.0f}/100" if result.performance_score else " Performance: N/A",
f" SEO: {result.seo_score:.0f}/100" if result.seo_score else " SEO: N/A",
f" Accessibility: {result.accessibility_score:.0f}/100" if result.accessibility_score else " Accessibility: N/A",
f" Best Practices: {result.best_practices_score:.0f}/100" if result.best_practices_score else " Best Practices: N/A",
"",
"Core Web Vitals:",
]
cwv = result.core_web_vitals
def format_metric(name: str, value: Any, rating: str | None, unit: str) -> str:
if value is None:
return f" {name}: N/A"
rating_str = f" ({rating})" if rating else ""
return f" {name}: {value}{unit}{rating_str}"
lines.append(format_metric("LCP", f"{cwv.lcp / 1000:.2f}" if cwv.lcp else None, cwv.lcp_rating, "s"))
lines.append(format_metric("FID/TBT", f"{cwv.fid:.0f}" if cwv.fid else None, cwv.fid_rating, "ms"))
lines.append(format_metric("CLS", f"{cwv.cls:.3f}" if cwv.cls else None, cwv.cls_rating, ""))
lines.append(format_metric("INP", f"{cwv.inp:.0f}" if cwv.inp else None, cwv.inp_rating, "ms"))
lines.append(format_metric("TTFB", f"{cwv.ttfb:.0f}" if cwv.ttfb else None, None, "ms"))
lines.append(format_metric("FCP", f"{cwv.fcp / 1000:.2f}" if cwv.fcp else None, None, "s"))
if result.opportunities:
lines.extend([
"",
f"Top Opportunities ({len(result.opportunities)} total):",
])
for opp in result.opportunities[:5]:
savings = opp["savings_ms"]
lines.append(f" - {opp['title']}: -{savings / 1000:.1f}s potential savings")
lines.extend(["", "=" * 60])
return "\n".join(lines)
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description="PageSpeed Insights Client")
parser.add_argument("--url", "-u", required=True, help="URL to analyze")
parser.add_argument("--strategy", "-s", default="mobile",
choices=["mobile", "desktop", "both"],
help="Analysis strategy")
parser.add_argument("--output", "-o", help="Output file for JSON")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--cwv-only", action="store_true",
help="Show only Core Web Vitals summary")
args = parser.parse_args()
client = PageSpeedClient()
if args.cwv_only:
summary = client.get_cwv_summary(args.url)
print(json.dumps(summary, indent=2))
elif args.strategy == "both":
result = client.analyze_both_strategies(args.url)
output = json.dumps(result, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
else:
print(output)
else:
result = client.analyze(args.url, strategy=args.strategy)
if args.json or args.output:
output = json.dumps(result.to_dict(), indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
else:
print(output)
else:
print(client.generate_report(result))
if __name__ == "__main__":
main()