Fix SEO skill 34 bugs, Korean labels, and transition Ahrefs refs to our-seo-agent (#2)

This commit is contained in:
Andrew Yim
2026-02-14 01:09:35 +09:00
committed by GitHub
parent d2d0a2d460
commit a28bfbf847
34 changed files with 265 additions and 262 deletions

View File

@@ -98,12 +98,14 @@ python scripts/executive_report.py --report aggregated_report.json --audience c-
- Support for C-level, marketing team, and technical team audiences
- Markdown output format
## Ahrefs MCP Tools Used
## Data Sources
| Tool | Purpose |
|------|---------|
| `site-explorer-metrics` | Fresh current organic metrics snapshot |
| `site-explorer-metrics-history` | Historical metrics for trend visualization |
| Source | Purpose |
|--------|---------|
| `our-seo-agent` CLI | Primary data source (future); use `--input` flag to provide pre-fetched JSON |
| `--output-dir` flag | Scan local JSON files from skills 11-33 |
| WebSearch / WebFetch | Supplementary data for trend context |
| Notion MCP | Query past audits from SEO Audit Log database |
## Output Format

View File

@@ -20,8 +20,10 @@ from tenacity import (
retry_if_exception_type,
)
# Load environment variables
load_dotenv()
# Logging setup
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
@@ -34,6 +36,13 @@ class RateLimiter:
"""Rate limiter using token bucket algorithm."""
def __init__(self, rate: float, per: float = 1.0):
"""
Initialize rate limiter.
Args:
rate: Number of requests allowed
per: Time period in seconds (default: 1 second)
"""
self.rate = rate
self.per = per
self.tokens = rate
@@ -41,6 +50,7 @@ class RateLimiter:
self._lock = asyncio.Lock()
async def acquire(self) -> None:
"""Acquire a token, waiting if necessary."""
async with self._lock:
now = datetime.now()
elapsed = (now - self.last_update).total_seconds()
@@ -64,6 +74,14 @@ class BaseAsyncClient:
requests_per_second: float = 3.0,
logger: logging.Logger | None = None,
):
"""
Initialize base client.
Args:
max_concurrent: Maximum concurrent requests
requests_per_second: Rate limit
logger: Logger instance
"""
self.semaphore = Semaphore(max_concurrent)
self.rate_limiter = RateLimiter(requests_per_second)
self.logger = logger or logging.getLogger(self.__class__.__name__)
@@ -83,6 +101,7 @@ class BaseAsyncClient:
self,
coro: Callable[[], Any],
) -> Any:
"""Execute a request with rate limiting and retry."""
async with self.semaphore:
await self.rate_limiter.acquire()
self.stats["requests"] += 1
@@ -100,6 +119,7 @@ class BaseAsyncClient:
requests: list[Callable[[], Any]],
desc: str = "Processing",
) -> list[Any]:
"""Execute multiple requests concurrently."""
try:
from tqdm.asyncio import tqdm
has_tqdm = True
@@ -124,6 +144,7 @@ class BaseAsyncClient:
return await asyncio.gather(*tasks, return_exceptions=True)
def print_stats(self) -> None:
"""Print request statistics."""
self.logger.info("=" * 40)
self.logger.info("Request Statistics:")
self.logger.info(f" Total Requests: {self.stats['requests']}")
@@ -140,6 +161,8 @@ class ConfigManager:
@property
def google_credentials_path(self) -> str | None:
"""Get Google service account credentials path."""
# Prefer SEO-specific credentials, fallback to general credentials
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
if os.path.exists(seo_creds):
return seo_creds
@@ -147,23 +170,38 @@ class ConfigManager:
@property
def pagespeed_api_key(self) -> str | None:
"""Get PageSpeed Insights API key."""
return os.getenv("PAGESPEED_API_KEY")
@property
def custom_search_api_key(self) -> str | None:
"""Get Custom Search API key."""
return os.getenv("CUSTOM_SEARCH_API_KEY")
@property
def custom_search_engine_id(self) -> str | None:
"""Get Custom Search Engine ID."""
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
@property
def notion_token(self) -> str | None:
"""Get Notion API token."""
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
def validate_google_credentials(self) -> bool:
"""Validate Google credentials are configured."""
creds_path = self.google_credentials_path
if not creds_path:
return False
return os.path.exists(creds_path)
def get_required(self, key: str) -> str:
"""Get required environment variable or raise error."""
value = os.getenv(key)
if not value:
raise ValueError(f"Missing required environment variable: {key}")
return value
# Singleton config instance
config = ConfigManager()

View File

@@ -453,11 +453,13 @@ CATEGORY_KOREAN_LABELS: dict[str, str] = {
"competitor": "경쟁사",
"schema": "스키마",
"kpi": "KPI",
"search_console": "Search Console",
"comprehensive": "종합 감사",
"search_console": "서치 콘솔",
"ecommerce": "이커머스",
"international": "국제 SEO",
"ai_search": "AI 검색",
"entity_seo": "엔티티 SEO",
"migration": "사이트 이전",
}

View File

@@ -123,11 +123,11 @@ CATEGORY_LABELS_KR = {
"competitor": "경쟁 분석",
"schema": "스키마/구조화 데이터",
"kpi": "KPI 프레임워크",
"search_console": "Search Console",
"search_console": "서치 콘솔",
"ecommerce": "이커머스 SEO",
"international": "국제 SEO",
"ai_search": "AI 검색 가시성",
"entity_seo": "Knowledge Graph",
"entity_seo": "지식 그래프",
}
# Common English issue descriptions -> Korean translations
@@ -434,11 +434,11 @@ class ExecutiveReportGenerator:
grade_kr = HEALTH_LABELS_KR.get(grade, grade)
trend_kr = TREND_LABELS_KR.get(summary.health_trend, summary.health_trend)
lines.append("## Health Score")
lines.append("## 종합 건강 점수")
lines.append("")
lines.append(f"| 지표 | 값 |")
lines.append(f"|------|-----|")
lines.append(f"| Overall Score | **{summary.health_score}/100** |")
lines.append(f"| 종합 점수 | **{summary.health_score}/100** |")
lines.append(f"| 등급 | {grade_kr} |")
lines.append(f"| 추세 | {trend_kr} |")
lines.append("")

View File

@@ -55,7 +55,7 @@ SKILL_REGISTRY = {
28: {"name": "knowledge-graph", "category": "entity_seo", "weight": 0.10},
31: {"name": "competitor-intel", "category": "competitor", "weight": 0.15},
32: {"name": "crawl-budget", "category": "technical", "weight": 0.10},
33: {"name": "page-experience", "category": "performance", "weight": 0.10},
33: {"name": "migration-planner", "category": "migration", "weight": 0.10},
}
CATEGORY_WEIGHTS = {
@@ -69,6 +69,13 @@ CATEGORY_WEIGHTS = {
"competitor": 0.05,
"schema": 0.05,
"kpi": 0.05,
"comprehensive": 1.0,
"search_console": 0.05,
"ecommerce": 0.05,
"international": 0.05,
"ai_search": 0.05,
"entity_seo": 0.05,
"migration": 0.05,
}
@@ -255,14 +262,15 @@ class ReportAggregator(BaseAsyncClient):
# Extract health score — check top-level first, then nested data dict
score_found = False
for key in ("health_score", "overall_health", "score"):
for key in ("health_score", "overall_health", "overall_score", "score",
"technical_score", "efficiency_score", "onpage_score"):
if key in data:
try:
skill_output.health_score = float(data[key])
score_found = True
break
except (ValueError, TypeError):
pass
break
continue
if not score_found:
nested = data.get("data", {})
@@ -276,9 +284,9 @@ class ReportAggregator(BaseAsyncClient):
if val is not None:
try:
skill_output.health_score = float(val)
break
except (ValueError, TypeError):
pass
break
continue
# Extract audit date
for key in ("audit_date", "report_date", "timestamp", "found_date"):

View File

@@ -20,10 +20,10 @@ Aggregate outputs from all SEO skills (11-33) into stakeholder-ready executive r
## MCP Tool Usage
### Ahrefs for Fresh Data Pull
### SEO Data
```
mcp__ahrefs__site-explorer-metrics: Pull current organic metrics snapshot for dashboard
mcp__ahrefs__site-explorer-metrics-history: Pull historical metrics for trend visualization
our-seo-agent CLI: Primary data source (future); use --input for pre-fetched JSON
WebSearch / WebFetch: Supplementary live data
```
### Notion for Reading Past Audits and Writing Reports
@@ -42,7 +42,7 @@ mcp__perplexity__*: Enrich reports with industry benchmarks and competitor conte
### Dashboard Generation
1. Accept target domain and optional date range
2. Query Notion SEO Audit Log for all past audit entries for the domain
3. Optionally pull fresh metrics from Ahrefs (site-explorer-metrics, metrics-history)
3. Optionally pull fresh metrics from our-seo-agent CLI or provide pre-fetched JSON via --input
4. Normalize all skill outputs into unified format
5. Compute cross-skill health score with weighted category dimensions
6. Identify top issues (sorted by severity) and top wins across all audits