directory changes and restructuring

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-22 02:01:41 +09:00
parent eea49f9f8c
commit 236be6c580
598 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,207 @@
"""
Base Client - Shared async client utilities
===========================================
Purpose: Rate-limited async operations for API clients
Python: 3.10+
"""
import asyncio
import logging
import os
from asyncio import Semaphore
from datetime import datetime
from typing import Any, Callable, TypeVar
from dotenv import load_dotenv
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
)
# Load environment variables
load_dotenv()
# Logging setup
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
T = TypeVar("T")
class RateLimiter:
"""Rate limiter using token bucket algorithm."""
def __init__(self, rate: float, per: float = 1.0):
"""
Initialize rate limiter.
Args:
rate: Number of requests allowed
per: Time period in seconds (default: 1 second)
"""
self.rate = rate
self.per = per
self.tokens = rate
self.last_update = datetime.now()
self._lock = asyncio.Lock()
async def acquire(self) -> None:
"""Acquire a token, waiting if necessary."""
async with self._lock:
now = datetime.now()
elapsed = (now - self.last_update).total_seconds()
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
self.last_update = now
if self.tokens < 1:
wait_time = (1 - self.tokens) * (self.per / self.rate)
await asyncio.sleep(wait_time)
self.tokens = 0
else:
self.tokens -= 1
class BaseAsyncClient:
"""Base class for async API clients with rate limiting."""
def __init__(
self,
max_concurrent: int = 5,
requests_per_second: float = 3.0,
logger: logging.Logger | None = None,
):
"""
Initialize base client.
Args:
max_concurrent: Maximum concurrent requests
requests_per_second: Rate limit
logger: Logger instance
"""
self.semaphore = Semaphore(max_concurrent)
self.rate_limiter = RateLimiter(requests_per_second)
self.logger = logger or logging.getLogger(self.__class__.__name__)
self.stats = {
"requests": 0,
"success": 0,
"errors": 0,
"retries": 0,
}
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type(Exception),
)
async def _rate_limited_request(
self,
coro: Callable[[], Any],
) -> Any:
"""Execute a request with rate limiting and retry."""
async with self.semaphore:
await self.rate_limiter.acquire()
self.stats["requests"] += 1
try:
result = await coro()
self.stats["success"] += 1
return result
except Exception as e:
self.stats["errors"] += 1
self.logger.error(f"Request failed: {e}")
raise
async def batch_requests(
self,
requests: list[Callable[[], Any]],
desc: str = "Processing",
) -> list[Any]:
"""Execute multiple requests concurrently."""
try:
from tqdm.asyncio import tqdm
has_tqdm = True
except ImportError:
has_tqdm = False
async def execute(req: Callable) -> Any:
try:
return await self._rate_limited_request(req)
except Exception as e:
return {"error": str(e)}
tasks = [execute(req) for req in requests]
if has_tqdm:
results = []
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
result = await coro
results.append(result)
return results
else:
return await asyncio.gather(*tasks, return_exceptions=True)
def print_stats(self) -> None:
"""Print request statistics."""
self.logger.info("=" * 40)
self.logger.info("Request Statistics:")
self.logger.info(f" Total Requests: {self.stats['requests']}")
self.logger.info(f" Successful: {self.stats['success']}")
self.logger.info(f" Errors: {self.stats['errors']}")
self.logger.info("=" * 40)
class ConfigManager:
"""Manage API configuration and credentials."""
def __init__(self):
load_dotenv()
@property
def google_credentials_path(self) -> str | None:
"""Get Google service account credentials path."""
# Prefer SEO-specific credentials, fallback to general credentials
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
if os.path.exists(seo_creds):
return seo_creds
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
@property
def pagespeed_api_key(self) -> str | None:
"""Get PageSpeed Insights API key."""
return os.getenv("PAGESPEED_API_KEY")
@property
def custom_search_api_key(self) -> str | None:
"""Get Custom Search API key."""
return os.getenv("CUSTOM_SEARCH_API_KEY")
@property
def custom_search_engine_id(self) -> str | None:
"""Get Custom Search Engine ID."""
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
@property
def notion_token(self) -> str | None:
"""Get Notion API token."""
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
def validate_google_credentials(self) -> bool:
"""Validate Google credentials are configured."""
creds_path = self.google_credentials_path
if not creds_path:
return False
return os.path.exists(creds_path)
def get_required(self, key: str) -> str:
"""Get required environment variable or raise error."""
value = os.getenv(key)
if not value:
raise ValueError(f"Missing required environment variable: {key}")
return value
# Singleton config instance
config = ConfigManager()

View File

@@ -0,0 +1,452 @@
"""
PageSpeed Insights Client
=========================
Purpose: Get Core Web Vitals and performance data from PageSpeed Insights API
Python: 3.10+
Usage:
from pagespeed_client import PageSpeedClient
client = PageSpeedClient()
result = client.analyze("https://example.com")
"""
import argparse
import json
import logging
from dataclasses import dataclass, field
from typing import Any
import requests
from base_client import config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
@dataclass
class CoreWebVitals:
"""Core Web Vitals metrics."""
lcp: float | None = None # Largest Contentful Paint (ms)
fid: float | None = None # First Input Delay (ms)
cls: float | None = None # Cumulative Layout Shift
inp: float | None = None # Interaction to Next Paint (ms)
ttfb: float | None = None # Time to First Byte (ms)
fcp: float | None = None # First Contentful Paint (ms)
# Assessment (GOOD, NEEDS_IMPROVEMENT, POOR)
lcp_rating: str | None = None
fid_rating: str | None = None
cls_rating: str | None = None
inp_rating: str | None = None
def to_dict(self) -> dict:
return {
"lcp": {"value": self.lcp, "rating": self.lcp_rating},
"fid": {"value": self.fid, "rating": self.fid_rating},
"cls": {"value": self.cls, "rating": self.cls_rating},
"inp": {"value": self.inp, "rating": self.inp_rating},
"ttfb": {"value": self.ttfb},
"fcp": {"value": self.fcp},
}
@dataclass
class PageSpeedResult:
"""PageSpeed analysis result."""
url: str
strategy: str # mobile or desktop
performance_score: float | None = None
seo_score: float | None = None
accessibility_score: float | None = None
best_practices_score: float | None = None
core_web_vitals: CoreWebVitals = field(default_factory=CoreWebVitals)
opportunities: list[dict] = field(default_factory=list)
diagnostics: list[dict] = field(default_factory=list)
passed_audits: list[str] = field(default_factory=list)
raw_data: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return {
"url": self.url,
"strategy": self.strategy,
"scores": {
"performance": self.performance_score,
"seo": self.seo_score,
"accessibility": self.accessibility_score,
"best_practices": self.best_practices_score,
},
"core_web_vitals": self.core_web_vitals.to_dict(),
"opportunities_count": len(self.opportunities),
"opportunities": self.opportunities[:10],
"diagnostics_count": len(self.diagnostics),
"passed_audits_count": len(self.passed_audits),
}
class PageSpeedClient:
"""Client for PageSpeed Insights API."""
BASE_URL = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
# Core Web Vitals thresholds
THRESHOLDS = {
"lcp": {"good": 2500, "poor": 4000},
"fid": {"good": 100, "poor": 300},
"cls": {"good": 0.1, "poor": 0.25},
"inp": {"good": 200, "poor": 500},
"ttfb": {"good": 800, "poor": 1800},
"fcp": {"good": 1800, "poor": 3000},
}
def __init__(self, api_key: str | None = None):
"""
Initialize PageSpeed client.
Args:
api_key: PageSpeed API key (optional but recommended for higher quotas)
"""
self.api_key = api_key or config.pagespeed_api_key
self.session = requests.Session()
def _rate_metric(self, metric: str, value: float | None) -> str | None:
"""Rate a metric against thresholds."""
if value is None:
return None
thresholds = self.THRESHOLDS.get(metric)
if not thresholds:
return None
if value <= thresholds["good"]:
return "GOOD"
elif value <= thresholds["poor"]:
return "NEEDS_IMPROVEMENT"
else:
return "POOR"
def analyze(
self,
url: str,
strategy: str = "mobile",
categories: list[str] | None = None,
) -> PageSpeedResult:
"""
Analyze a URL with PageSpeed Insights.
Args:
url: URL to analyze
strategy: "mobile" or "desktop"
categories: Categories to analyze (performance, seo, accessibility, best-practices)
Returns:
PageSpeedResult with scores and metrics
"""
if categories is None:
categories = ["performance", "seo", "accessibility", "best-practices"]
params = {
"url": url,
"strategy": strategy,
"category": categories,
}
if self.api_key:
params["key"] = self.api_key
try:
response = self.session.get(self.BASE_URL, params=params, timeout=60)
response.raise_for_status()
data = response.json()
except requests.RequestException as e:
logger.error(f"PageSpeed API request failed: {e}")
raise
result = PageSpeedResult(url=url, strategy=strategy, raw_data=data)
# Extract scores
lighthouse = data.get("lighthouseResult", {})
categories_data = lighthouse.get("categories", {})
if "performance" in categories_data:
score = categories_data["performance"].get("score")
result.performance_score = score * 100 if score else None
if "seo" in categories_data:
score = categories_data["seo"].get("score")
result.seo_score = score * 100 if score else None
if "accessibility" in categories_data:
score = categories_data["accessibility"].get("score")
result.accessibility_score = score * 100 if score else None
if "best-practices" in categories_data:
score = categories_data["best-practices"].get("score")
result.best_practices_score = score * 100 if score else None
# Extract Core Web Vitals
audits = lighthouse.get("audits", {})
# Lab data
cwv = result.core_web_vitals
if "largest-contentful-paint" in audits:
cwv.lcp = audits["largest-contentful-paint"].get("numericValue")
cwv.lcp_rating = self._rate_metric("lcp", cwv.lcp)
if "total-blocking-time" in audits:
# TBT is proxy for FID in lab data
cwv.fid = audits["total-blocking-time"].get("numericValue")
cwv.fid_rating = self._rate_metric("fid", cwv.fid)
if "cumulative-layout-shift" in audits:
cwv.cls = audits["cumulative-layout-shift"].get("numericValue")
cwv.cls_rating = self._rate_metric("cls", cwv.cls)
if "experimental-interaction-to-next-paint" in audits:
cwv.inp = audits["experimental-interaction-to-next-paint"].get("numericValue")
cwv.inp_rating = self._rate_metric("inp", cwv.inp)
if "server-response-time" in audits:
cwv.ttfb = audits["server-response-time"].get("numericValue")
if "first-contentful-paint" in audits:
cwv.fcp = audits["first-contentful-paint"].get("numericValue")
# Field data (real user data) if available
loading_exp = data.get("loadingExperience", {})
metrics = loading_exp.get("metrics", {})
if "LARGEST_CONTENTFUL_PAINT_MS" in metrics:
cwv.lcp = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("percentile")
cwv.lcp_rating = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("category")
if "FIRST_INPUT_DELAY_MS" in metrics:
cwv.fid = metrics["FIRST_INPUT_DELAY_MS"].get("percentile")
cwv.fid_rating = metrics["FIRST_INPUT_DELAY_MS"].get("category")
if "CUMULATIVE_LAYOUT_SHIFT_SCORE" in metrics:
cwv.cls = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("percentile") / 100
cwv.cls_rating = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("category")
if "INTERACTION_TO_NEXT_PAINT" in metrics:
cwv.inp = metrics["INTERACTION_TO_NEXT_PAINT"].get("percentile")
cwv.inp_rating = metrics["INTERACTION_TO_NEXT_PAINT"].get("category")
# Extract opportunities
for audit_id, audit in audits.items():
if audit.get("details", {}).get("type") == "opportunity":
savings = audit.get("details", {}).get("overallSavingsMs", 0)
if savings > 0:
result.opportunities.append({
"id": audit_id,
"title": audit.get("title", ""),
"description": audit.get("description", ""),
"savings_ms": savings,
"score": audit.get("score", 0),
})
# Sort opportunities by savings
result.opportunities.sort(key=lambda x: x["savings_ms"], reverse=True)
# Extract diagnostics
for audit_id, audit in audits.items():
score = audit.get("score")
if score is not None and score < 1 and audit.get("details"):
if audit.get("details", {}).get("type") not in ["opportunity", None]:
result.diagnostics.append({
"id": audit_id,
"title": audit.get("title", ""),
"description": audit.get("description", ""),
"score": score,
})
# Extract passed audits
for audit_id, audit in audits.items():
if audit.get("score") == 1:
result.passed_audits.append(audit.get("title", audit_id))
return result
def analyze_both_strategies(self, url: str) -> dict:
"""Analyze URL for both mobile and desktop."""
mobile = self.analyze(url, strategy="mobile")
desktop = self.analyze(url, strategy="desktop")
return {
"url": url,
"mobile": mobile.to_dict(),
"desktop": desktop.to_dict(),
"comparison": {
"performance_difference": (
(desktop.performance_score or 0) - (mobile.performance_score or 0)
),
"mobile_first_issues": self._identify_mobile_issues(mobile, desktop),
},
}
def _identify_mobile_issues(
self,
mobile: PageSpeedResult,
desktop: PageSpeedResult,
) -> list[str]:
"""Identify issues that affect mobile more than desktop."""
issues = []
if mobile.performance_score and desktop.performance_score:
if desktop.performance_score - mobile.performance_score > 20:
issues.append("Significant performance gap between mobile and desktop")
m_cwv = mobile.core_web_vitals
d_cwv = desktop.core_web_vitals
if m_cwv.lcp and d_cwv.lcp and m_cwv.lcp > d_cwv.lcp * 1.5:
issues.append("LCP significantly slower on mobile")
if m_cwv.cls and d_cwv.cls and m_cwv.cls > d_cwv.cls * 2:
issues.append("Layout shift issues more severe on mobile")
return issues
def get_cwv_summary(self, url: str) -> dict:
"""Get a summary focused on Core Web Vitals."""
result = self.analyze(url, strategy="mobile")
cwv = result.core_web_vitals
return {
"url": url,
"overall_cwv_status": self._overall_cwv_status(cwv),
"metrics": {
"lcp": {
"value": f"{cwv.lcp / 1000:.2f}s" if cwv.lcp else None,
"rating": cwv.lcp_rating,
"threshold": "≤ 2.5s good, > 4.0s poor",
},
"fid": {
"value": f"{cwv.fid:.0f}ms" if cwv.fid else None,
"rating": cwv.fid_rating,
"threshold": "≤ 100ms good, > 300ms poor",
},
"cls": {
"value": f"{cwv.cls:.3f}" if cwv.cls else None,
"rating": cwv.cls_rating,
"threshold": "≤ 0.1 good, > 0.25 poor",
},
"inp": {
"value": f"{cwv.inp:.0f}ms" if cwv.inp else None,
"rating": cwv.inp_rating,
"threshold": "≤ 200ms good, > 500ms poor",
},
},
"top_opportunities": result.opportunities[:5],
}
def _overall_cwv_status(self, cwv: CoreWebVitals) -> str:
"""Determine overall Core Web Vitals status."""
ratings = [cwv.lcp_rating, cwv.fid_rating, cwv.cls_rating]
ratings = [r for r in ratings if r]
if not ratings:
return "UNKNOWN"
if any(r == "POOR" for r in ratings):
return "POOR"
if any(r == "NEEDS_IMPROVEMENT" for r in ratings):
return "NEEDS_IMPROVEMENT"
return "GOOD"
def generate_report(self, result: PageSpeedResult) -> str:
"""Generate human-readable performance report."""
lines = [
"=" * 60,
"PageSpeed Insights Report",
"=" * 60,
f"URL: {result.url}",
f"Strategy: {result.strategy}",
"",
"Scores:",
f" Performance: {result.performance_score:.0f}/100" if result.performance_score else " Performance: N/A",
f" SEO: {result.seo_score:.0f}/100" if result.seo_score else " SEO: N/A",
f" Accessibility: {result.accessibility_score:.0f}/100" if result.accessibility_score else " Accessibility: N/A",
f" Best Practices: {result.best_practices_score:.0f}/100" if result.best_practices_score else " Best Practices: N/A",
"",
"Core Web Vitals:",
]
cwv = result.core_web_vitals
def format_metric(name: str, value: Any, rating: str | None, unit: str) -> str:
if value is None:
return f" {name}: N/A"
rating_str = f" ({rating})" if rating else ""
return f" {name}: {value}{unit}{rating_str}"
lines.append(format_metric("LCP", f"{cwv.lcp / 1000:.2f}" if cwv.lcp else None, cwv.lcp_rating, "s"))
lines.append(format_metric("FID/TBT", f"{cwv.fid:.0f}" if cwv.fid else None, cwv.fid_rating, "ms"))
lines.append(format_metric("CLS", f"{cwv.cls:.3f}" if cwv.cls else None, cwv.cls_rating, ""))
lines.append(format_metric("INP", f"{cwv.inp:.0f}" if cwv.inp else None, cwv.inp_rating, "ms"))
lines.append(format_metric("TTFB", f"{cwv.ttfb:.0f}" if cwv.ttfb else None, None, "ms"))
lines.append(format_metric("FCP", f"{cwv.fcp / 1000:.2f}" if cwv.fcp else None, None, "s"))
if result.opportunities:
lines.extend([
"",
f"Top Opportunities ({len(result.opportunities)} total):",
])
for opp in result.opportunities[:5]:
savings = opp["savings_ms"]
lines.append(f" - {opp['title']}: -{savings / 1000:.1f}s potential savings")
lines.extend(["", "=" * 60])
return "\n".join(lines)
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(description="PageSpeed Insights Client")
parser.add_argument("--url", "-u", required=True, help="URL to analyze")
parser.add_argument("--strategy", "-s", default="mobile",
choices=["mobile", "desktop", "both"],
help="Analysis strategy")
parser.add_argument("--output", "-o", help="Output file for JSON")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--cwv-only", action="store_true",
help="Show only Core Web Vitals summary")
args = parser.parse_args()
client = PageSpeedClient()
if args.cwv_only:
summary = client.get_cwv_summary(args.url)
print(json.dumps(summary, indent=2))
elif args.strategy == "both":
result = client.analyze_both_strategies(args.url)
output = json.dumps(result, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
else:
print(output)
else:
result = client.analyze(args.url, strategy=args.strategy)
if args.json or args.output:
output = json.dumps(result.to_dict(), indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
else:
print(output)
else:
print(client.generate_report(result))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,6 @@
# 15-seo-core-web-vitals dependencies
google-api-python-client>=2.100.0
requests>=2.31.0
python-dotenv>=1.0.0
rich>=13.7.0
typer>=0.9.0