Fix SEO skill 34 bugs, Korean labels, and transition Ahrefs refs to our-seo-agent (#2)

This commit is contained in:
Andrew Yim
2026-02-14 01:09:35 +09:00
committed by GitHub
parent d2d0a2d460
commit a28bfbf847
34 changed files with 265 additions and 262 deletions

View File

@@ -98,12 +98,14 @@ python scripts/executive_report.py --report aggregated_report.json --audience c-
- Support for C-level, marketing team, and technical team audiences
- Markdown output format
## Ahrefs MCP Tools Used
## Data Sources
| Tool | Purpose |
|------|---------|
| `site-explorer-metrics` | Fresh current organic metrics snapshot |
| `site-explorer-metrics-history` | Historical metrics for trend visualization |
| Source | Purpose |
|--------|---------|
| `our-seo-agent` CLI | Primary data source (future); use `--input` flag to provide pre-fetched JSON |
| `--output-dir` flag | Scan local JSON files from skills 11-33 |
| WebSearch / WebFetch | Supplementary data for trend context |
| Notion MCP | Query past audits from SEO Audit Log database |
## Output Format

View File

@@ -20,8 +20,10 @@ from tenacity import (
retry_if_exception_type,
)
# Load environment variables
load_dotenv()
# Logging setup
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
@@ -34,6 +36,13 @@ class RateLimiter:
"""Rate limiter using token bucket algorithm."""
def __init__(self, rate: float, per: float = 1.0):
"""
Initialize rate limiter.
Args:
rate: Number of requests allowed
per: Time period in seconds (default: 1 second)
"""
self.rate = rate
self.per = per
self.tokens = rate
@@ -41,6 +50,7 @@ class RateLimiter:
self._lock = asyncio.Lock()
async def acquire(self) -> None:
"""Acquire a token, waiting if necessary."""
async with self._lock:
now = datetime.now()
elapsed = (now - self.last_update).total_seconds()
@@ -64,6 +74,14 @@ class BaseAsyncClient:
requests_per_second: float = 3.0,
logger: logging.Logger | None = None,
):
"""
Initialize base client.
Args:
max_concurrent: Maximum concurrent requests
requests_per_second: Rate limit
logger: Logger instance
"""
self.semaphore = Semaphore(max_concurrent)
self.rate_limiter = RateLimiter(requests_per_second)
self.logger = logger or logging.getLogger(self.__class__.__name__)
@@ -83,6 +101,7 @@ class BaseAsyncClient:
self,
coro: Callable[[], Any],
) -> Any:
"""Execute a request with rate limiting and retry."""
async with self.semaphore:
await self.rate_limiter.acquire()
self.stats["requests"] += 1
@@ -100,6 +119,7 @@ class BaseAsyncClient:
requests: list[Callable[[], Any]],
desc: str = "Processing",
) -> list[Any]:
"""Execute multiple requests concurrently."""
try:
from tqdm.asyncio import tqdm
has_tqdm = True
@@ -124,6 +144,7 @@ class BaseAsyncClient:
return await asyncio.gather(*tasks, return_exceptions=True)
def print_stats(self) -> None:
"""Print request statistics."""
self.logger.info("=" * 40)
self.logger.info("Request Statistics:")
self.logger.info(f" Total Requests: {self.stats['requests']}")
@@ -140,6 +161,8 @@ class ConfigManager:
@property
def google_credentials_path(self) -> str | None:
"""Get Google service account credentials path."""
# Prefer SEO-specific credentials, fallback to general credentials
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
if os.path.exists(seo_creds):
return seo_creds
@@ -147,23 +170,38 @@ class ConfigManager:
@property
def pagespeed_api_key(self) -> str | None:
"""Get PageSpeed Insights API key."""
return os.getenv("PAGESPEED_API_KEY")
@property
def custom_search_api_key(self) -> str | None:
"""Get Custom Search API key."""
return os.getenv("CUSTOM_SEARCH_API_KEY")
@property
def custom_search_engine_id(self) -> str | None:
"""Get Custom Search Engine ID."""
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
@property
def notion_token(self) -> str | None:
"""Get Notion API token."""
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
def validate_google_credentials(self) -> bool:
"""Validate Google credentials are configured."""
creds_path = self.google_credentials_path
if not creds_path:
return False
return os.path.exists(creds_path)
def get_required(self, key: str) -> str:
"""Get required environment variable or raise error."""
value = os.getenv(key)
if not value:
raise ValueError(f"Missing required environment variable: {key}")
return value
# Singleton config instance
config = ConfigManager()

View File

@@ -453,11 +453,13 @@ CATEGORY_KOREAN_LABELS: dict[str, str] = {
"competitor": "경쟁사",
"schema": "스키마",
"kpi": "KPI",
"search_console": "Search Console",
"comprehensive": "종합 감사",
"search_console": "서치 콘솔",
"ecommerce": "이커머스",
"international": "국제 SEO",
"ai_search": "AI 검색",
"entity_seo": "엔티티 SEO",
"migration": "사이트 이전",
}

View File

@@ -123,11 +123,11 @@ CATEGORY_LABELS_KR = {
"competitor": "경쟁 분석",
"schema": "스키마/구조화 데이터",
"kpi": "KPI 프레임워크",
"search_console": "Search Console",
"search_console": "서치 콘솔",
"ecommerce": "이커머스 SEO",
"international": "국제 SEO",
"ai_search": "AI 검색 가시성",
"entity_seo": "Knowledge Graph",
"entity_seo": "지식 그래프",
}
# Common English issue descriptions -> Korean translations
@@ -434,11 +434,11 @@ class ExecutiveReportGenerator:
grade_kr = HEALTH_LABELS_KR.get(grade, grade)
trend_kr = TREND_LABELS_KR.get(summary.health_trend, summary.health_trend)
lines.append("## Health Score")
lines.append("## 종합 건강 점수")
lines.append("")
lines.append(f"| 지표 | 값 |")
lines.append(f"|------|-----|")
lines.append(f"| Overall Score | **{summary.health_score}/100** |")
lines.append(f"| 종합 점수 | **{summary.health_score}/100** |")
lines.append(f"| 등급 | {grade_kr} |")
lines.append(f"| 추세 | {trend_kr} |")
lines.append("")

View File

@@ -55,7 +55,7 @@ SKILL_REGISTRY = {
28: {"name": "knowledge-graph", "category": "entity_seo", "weight": 0.10},
31: {"name": "competitor-intel", "category": "competitor", "weight": 0.15},
32: {"name": "crawl-budget", "category": "technical", "weight": 0.10},
33: {"name": "page-experience", "category": "performance", "weight": 0.10},
33: {"name": "migration-planner", "category": "migration", "weight": 0.10},
}
CATEGORY_WEIGHTS = {
@@ -69,6 +69,13 @@ CATEGORY_WEIGHTS = {
"competitor": 0.05,
"schema": 0.05,
"kpi": 0.05,
"comprehensive": 1.0,
"search_console": 0.05,
"ecommerce": 0.05,
"international": 0.05,
"ai_search": 0.05,
"entity_seo": 0.05,
"migration": 0.05,
}
@@ -255,14 +262,15 @@ class ReportAggregator(BaseAsyncClient):
# Extract health score — check top-level first, then nested data dict
score_found = False
for key in ("health_score", "overall_health", "score"):
for key in ("health_score", "overall_health", "overall_score", "score",
"technical_score", "efficiency_score", "onpage_score"):
if key in data:
try:
skill_output.health_score = float(data[key])
score_found = True
break
except (ValueError, TypeError):
pass
break
continue
if not score_found:
nested = data.get("data", {})
@@ -276,9 +284,9 @@ class ReportAggregator(BaseAsyncClient):
if val is not None:
try:
skill_output.health_score = float(val)
break
except (ValueError, TypeError):
pass
break
continue
# Extract audit date
for key in ("audit_date", "report_date", "timestamp", "found_date"):