New skills: - Skill 33: Site migration planner with redirect mapping and monitoring - Skill 34: Reporting dashboard with HTML charts and Korean executive reports Bug fixes (Skill 34 - report_aggregator.py): - Add audit_type fallback for skill identification (was only using audit_id prefix) - Extract health scores from nested data dict (technical_score, onpage_score, etc.) - Support subdomain matching in domain filter (blog.ourdigital.org matches ourdigital.org) - Skip self-referencing DASH- aggregated reports Bug fixes (Skill 20 - naver_serp_analyzer.py): - Remove VIEW tab selectors (removed by Naver in 2026) - Add new section detectors: books (도서), shortform (숏폼), influencer (인플루언서) Improvements (Skill 34 - dashboard/executive report): - Add Korean category labels for Chart.js charts (기술 SEO, 온페이지, etc.) - Add Korean trend labels (개선 중 ↑, 안정 →, 하락 중 ↓) - Add English→Korean issue description translation layer (20 common patterns) Documentation improvements: - Add Korean triggers to 4 skill descriptions (19, 25, 28, 31) - Expand Skill 32 SKILL.md from 40→143 lines (was 6/10, added workflow, output format, limitations) - Add output format examples to Skills 27 and 28 SKILL.md - Add limitations sections to Skills 27 and 28 - Update README.md, CLAUDE.md, AGENTS.md for skills 33-34 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
170 lines
4.8 KiB
Python
170 lines
4.8 KiB
Python
"""
|
|
Base Client - Shared async client utilities
|
|
===========================================
|
|
Purpose: Rate-limited async operations for API clients
|
|
Python: 3.10+
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
from asyncio import Semaphore
|
|
from datetime import datetime
|
|
from typing import Any, Callable, TypeVar
|
|
|
|
from dotenv import load_dotenv
|
|
from tenacity import (
|
|
retry,
|
|
stop_after_attempt,
|
|
wait_exponential,
|
|
retry_if_exception_type,
|
|
)
|
|
|
|
load_dotenv()
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
)
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
class RateLimiter:
|
|
"""Rate limiter using token bucket algorithm."""
|
|
|
|
def __init__(self, rate: float, per: float = 1.0):
|
|
self.rate = rate
|
|
self.per = per
|
|
self.tokens = rate
|
|
self.last_update = datetime.now()
|
|
self._lock = asyncio.Lock()
|
|
|
|
async def acquire(self) -> None:
|
|
async with self._lock:
|
|
now = datetime.now()
|
|
elapsed = (now - self.last_update).total_seconds()
|
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
|
self.last_update = now
|
|
|
|
if self.tokens < 1:
|
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
|
await asyncio.sleep(wait_time)
|
|
self.tokens = 0
|
|
else:
|
|
self.tokens -= 1
|
|
|
|
|
|
class BaseAsyncClient:
|
|
"""Base class for async API clients with rate limiting."""
|
|
|
|
def __init__(
|
|
self,
|
|
max_concurrent: int = 5,
|
|
requests_per_second: float = 3.0,
|
|
logger: logging.Logger | None = None,
|
|
):
|
|
self.semaphore = Semaphore(max_concurrent)
|
|
self.rate_limiter = RateLimiter(requests_per_second)
|
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
|
self.stats = {
|
|
"requests": 0,
|
|
"success": 0,
|
|
"errors": 0,
|
|
"retries": 0,
|
|
}
|
|
|
|
@retry(
|
|
stop=stop_after_attempt(3),
|
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
retry=retry_if_exception_type(Exception),
|
|
)
|
|
async def _rate_limited_request(
|
|
self,
|
|
coro: Callable[[], Any],
|
|
) -> Any:
|
|
async with self.semaphore:
|
|
await self.rate_limiter.acquire()
|
|
self.stats["requests"] += 1
|
|
try:
|
|
result = await coro()
|
|
self.stats["success"] += 1
|
|
return result
|
|
except Exception as e:
|
|
self.stats["errors"] += 1
|
|
self.logger.error(f"Request failed: {e}")
|
|
raise
|
|
|
|
async def batch_requests(
|
|
self,
|
|
requests: list[Callable[[], Any]],
|
|
desc: str = "Processing",
|
|
) -> list[Any]:
|
|
try:
|
|
from tqdm.asyncio import tqdm
|
|
has_tqdm = True
|
|
except ImportError:
|
|
has_tqdm = False
|
|
|
|
async def execute(req: Callable) -> Any:
|
|
try:
|
|
return await self._rate_limited_request(req)
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
tasks = [execute(req) for req in requests]
|
|
|
|
if has_tqdm:
|
|
results = []
|
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
|
result = await coro
|
|
results.append(result)
|
|
return results
|
|
else:
|
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
def print_stats(self) -> None:
|
|
self.logger.info("=" * 40)
|
|
self.logger.info("Request Statistics:")
|
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
|
self.logger.info(f" Successful: {self.stats['success']}")
|
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
|
self.logger.info("=" * 40)
|
|
|
|
|
|
class ConfigManager:
|
|
"""Manage API configuration and credentials."""
|
|
|
|
def __init__(self):
|
|
load_dotenv()
|
|
|
|
@property
|
|
def google_credentials_path(self) -> str | None:
|
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
|
if os.path.exists(seo_creds):
|
|
return seo_creds
|
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
|
|
|
@property
|
|
def pagespeed_api_key(self) -> str | None:
|
|
return os.getenv("PAGESPEED_API_KEY")
|
|
|
|
@property
|
|
def notion_token(self) -> str | None:
|
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
|
|
|
def validate_google_credentials(self) -> bool:
|
|
creds_path = self.google_credentials_path
|
|
if not creds_path:
|
|
return False
|
|
return os.path.exists(creds_path)
|
|
|
|
def get_required(self, key: str) -> str:
|
|
value = os.getenv(key)
|
|
if not value:
|
|
raise ValueError(f"Missing required environment variable: {key}")
|
|
return value
|
|
|
|
|
|
config = ConfigManager()
|